## PyTorch Multilayer Perceptron

Create a subclass of the `torch.nn.Module` class and override the `forward()` method. This method defines how the network processes input and produces output.

The `MLP` class takes `input size`, `hidden_size`, and `output_size` as arguments. 
The `__init__()` method creates two fully-connected layers (`nn.Linear`) and a ReLU activation function (`nn.ReLU`). 
The `forward()` method applies these layers to the input data `x` and returns the output.

To train this network, we define a loss function, choose an optimizer, and iterate over the input data to update the model's weights. 

In [None]:
import torch
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# create the network
mlp = MLP(input_size=784, hidden_size=256, output_size=10)

# generate some random input data
x = torch.randn(64, 784)

# feed the input data through the network
output = mlp(x)


Adding code for optimizer

In [None]:
#!/usr/bin/env python

# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/

# Written by Francois Fleuret <francois@fleuret.org>

from torch import Tensor

import sys

def exception_hook(exc_type, exc_value, tb):
    r'''Hacks the call stack message to show all the local variables in
    case of RuntimeError or ValueError, and prints tensors as shape,
    dtype and device.

    '''

    repr_orig=Tensor.__repr__
    Tensor.__repr__=lambda x: f'{x.size()}:{x.dtype}:{x.device}'

    while tb:
        print('--------------------------------------------------\n')
        filename = tb.tb_frame.f_code.co_filename
        name = tb.tb_frame.f_code.co_name
        line_no = tb.tb_lineno
        print(f'  File "{filename}", line {line_no}, in {name}')
        print(open(filename, 'r').readlines()[line_no-1])

        if exc_type in { RuntimeError, ValueError }:
            for n,v in tb.tb_frame.f_locals.items():
                print(f'  {n} -> {v}')

        print()
        tb = tb.tb_next

    Tensor.__repr__=repr_orig

    print(f'{exc_type.__name__}: {exc_value}')

sys.excepthook = exception_hook

######################################################################


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import random

# define the function to generate the data from
def f(x):
    return 3.2 * x**3 + 2 * x**2 + 3 * x

# the amount of noise to add
noise_scale = 0.1

# the number of samples to generate
num_samples = 1000


# generate the input values
x_values = [random.uniform(-1, 1) for _ in range(num_samples)]

# evaluate the function at each input value
y_values = [f(x) + random.gauss(0, noise_scale) for x in x_values]

# split the data into train and test sets
split_index = int(0.8 * num_samples)
x_train, y_train = x_values[:split_index], y_values[:split_index]
x_test, y_test = x_values[split_index:], y_values[split_index:]

# define the dataset
class MyDataset(Dataset):
    def __init__(self, x_values, y_values):
        self.data = x_values
        self.labels = y_values
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        # this returns a tuple of the input and label data for a sample, rather than a single tensor containing the data
        return self.data[index], self.labels[index] 
        # To return a single element
        # return torch.tensor([self.data[index], self.labels[index]])

# define the neural network
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        print(f"Shape of x: {x.shape}")
        x = self.fc1(x) #torch.reshape(x,(64,1))
        x = self.relu(x)
        x = self.fc2(x)
        return x

# create the network, dataset, and data loader
mlp = MLP(input_size=1, hidden_size=64, output_size=1)
# mlp = MLP(input_size=784, hidden_size=256, output_size=10)
train_dataset = MyDataset(x_train, y_train)
train_data_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = MyDataset(x_test, y_test)
test_data_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# define the loss function and optimizer
criterion = nn.MSELoss()
# criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mlp.parameters(), lr=0.01, momentum=0.9)

# train the network
for epoch in range(10):
    for data, labels in train_data_loader:
        # feed the data through the network
        output = mlp(data)
        
        # calculate the loss
        loss = criterion(output, labels)
        
        # backpropagate the gradients
        optimizer.zero_grad()
        loss.backward()
        
        # update the weights
        optimizer.step()



In [None]:
dataset = MyDataset(x_test, y_test)

# print the length and shape of the dataset
print(f"The dataset has length {len(dataset)}")
print(f"The value of the first sample in the dataset is {dataset[0]}")
print(f"The dataset length is {len(dataset)}")
print(f"The dataset width is {len(dataset[0])}")


In [None]:
a = torch.arange(4.)
torch.reshape(a, (2, 2))
b = torch.tensor([[0, 1], [2, 3]])
torch.reshape(b, (-1,))

In [None]:
torch.reshape(a, (2,2))

To assess the quality of the trained MLP model, you can use the test dataset to evaluate the model's performance on unseen data. One way to do this is to iterate over the test data, making predictions with the model and comparing the predictions to the true labels. You can then calculate various performance metrics, such as the mean squared error (MSE) or the mean absolute error (MAE), to assess the quality of the model's predictions.

Here is an example of how you might evaluate the trained MLP model using the test dataset:



In [None]:
# initialize the list of predicted values and the list of true values
predicted_values = []
true_values = []

# iterate over the test data
for inputs, labels in test_data_loader:
    # make predictions using the model
    outputs = mlp(inputs)
    
    # add the predicted values and true values to the list
    predicted_values.extend(outputs)
    true_values.extend(labels)

# calculate the mean squared error
mse = torch.mean((torch.tensor(predicted_values) - torch.tensor(true_values))**2)
print(f"Mean squared error: {mse}")

# calculate the mean absolute error
mae = torch.mean(torch.abs(torch.tensor(predicted_values) - torch.tensor(true_values)))
print(f"Mean absolute error: {mae}")


## PyTorch - MNIST

In [None]:
import torch 
import torch.nn as nn # All the NN modules (ex. nn.Linear & loss functions)
import torch.optim as optim # SGD, Adam
import torch.nn.functional as F # Functions without parameters (activations)
from torch.utils.data import DataLoader # dataset management w minibatch mgmt
import torchvision.datasets as datasets # built-in datasets
import torchvision.transforms as transforms # transformations on our dataset

In [None]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Creating our fully connected network

In [None]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__() # call initialization of nn.Module
        self.fc1 = nn.Linear(input_size, 25) # hidden layers are 25
        self.fc2 = nn.Linear(25, num_classes)

    def forward(self, x): # x: input to run forward method upon
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [None]:
# Initialize the model to check transformation on random data
x = torch.randn(32, 784) # batch size is 32, each image 784 pixels (28x28)
model = NN(784, 10) # 10 classes
print(model(x).shape) # should return 32x10 (prob across all numbers for each image)


## Hyperparameters

In [None]:
input_size = 784
batch_size = 32
learning_rate = 0.001
num_classes = 10
num_epochs = 2

In [None]:
# Load the dataset
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


## Network initialization


In [None]:
model = NN(input_size=input_size, num_classes=num_classes).to(device)

Loss function & optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Training the network

In [None]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device=device) # transfer tensor 
        target = targets.to(device=device)

        print(data.shape)


# RNN from Scratch

In [None]:
import numpy as np
from numpy.random import randn

In [None]:
class RNN:
  # A many-to-one Vanilla Recurrent Neural Network.
  # 
  def __init__(self, input_size, output_size, hidden_size=64):
    # Weights
    self.Whh = randn(hidden_size, hidden_size) / 1000
    self.Wxh = randn(hidden_size, input_size) / 1000
    self.Why = randn(output_size, hidden_size) / 1000

    # Biases
    self.bh = np.zeros((hidden_size, 1))
    self.by = np.zeros((output_size, 1))

  def forward(self, inputs):
    '''
    Perform a forward pass of the RNN using the given inputs.
    Returns the final output and hidden state.
    - inputs is an array of one hot vectors with shape (input_size, 1).
    '''
    h = np.zeros((self.Whh.shape[0], 1))

    self.last_inputs = inputs
    self.last_hs = { 0: h }

    # Perform each step of the RNN
    for i, x in enumerate(inputs):
      h = np.tanh(self.Wxh @ x + self.Whh @ h + self.bh)
      self.last_hs[i + 1] = h

    # Compute the output
    y = self.Why @ h + self.by

    return y, h
    
  def backprop(self, d_y, learn_rate=2e-2):
    '''
    Perform a backward pass of the RNN.
    - d_y (dL/dy) has shape (output_size, 1).
    - learn_rate is a float.
    '''
    n = len(self.last_inputs)

    # Calculate dL/dWhy and dL/dby.
    d_Why = d_y @ self.last_hs[n].T
    d_by = d_y

    # Initialize dL/dWhh, dL/dWxh, and dL/dbh to zero.
    d_Whh = np.zeros(self.Whh.shape)
    d_Wxh = np.zeros(self.Wxh.shape)
    d_bh = np.zeros(self.bh.shape)

    # Calculate dL/dh for the last h.
    # dL/dh = dL/dy * dy/dh
    d_h = self.Why.T @ d_y

    # Backpropagate through time.
    for t in reversed(range(n)):
      # An intermediate value: dL/dh * (1 - h^2)
      temp = ((1 - self.last_hs[t + 1] ** 2) * d_h)

      # dL/db = dL/dh * (1 - h^2)
      d_bh += temp

      # dL/dWhh = dL/dh * (1 - h^2) * h_{t-1}
      d_Whh += temp @ self.last_hs[t].T

      # dL/dWxh = dL/dh * (1 - h^2) * x
      d_Wxh += temp @ self.last_inputs[t].T

      # Next dL/dh = dL/dh * (1 - h^2) * Whh
      d_h = self.Whh @ temp

    # Clip to prevent exploding gradients.
    for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
      np.clip(d, -1, 1, out=d)

    # Update weights and biases using gradient descent.
    self.Whh -= learn_rate * d_Whh
    self.Wxh -= learn_rate * d_Wxh
    self.Why -= learn_rate * d_Why
    self.bh -= learn_rate * d_bh
    self.by -= learn_rate * d_by


In [None]:
train_data = {
  'good': True,
  'bad': False,
  'happy': True,
  'sad': False,
  'not good': False,
  'not bad': True,
  'not happy': False,
  'not sad': True,
  'very good': True,
  'very bad': False,
  'very happy': True,
  'very sad': False,
  'i am happy': True,
  'this is good': True,
  'i am bad': False,
  'this is bad': False,
  'i am sad': False,
  'this is sad': False,
  'i am not happy': False,
  'this is not good': False,
  'i am not bad': True,
  'this is not sad': True,
  'i am very happy': True,
  'this is very good': True,
  'i am very bad': False,
  'this is very sad': False,
  'this is very happy': True,
  'i am good not bad': True,
  'this is good not bad': True,
  'i am bad not good': False,
  'i am good and happy': True,
  'this is not good and not happy': False,
  'i am not at all good': False,
  'i am not at all bad': True,
  'i am not at all happy': False,
  'this is not at all sad': True,
  'this is not at all happy': False,
  'i am good right now': True,
  'i am bad right now': False,
  'this is bad right now': False,
  'i am sad right now': False,
  'i was good earlier': True,
  'i was happy earlier': True,
  'i was bad earlier': False,
  'i was sad earlier': False,
  'i am very bad right now': False,
  'this is very good right now': True,
  'this is very sad right now': False,
  'this was bad earlier': False,
  'this was very good earlier': True,
  'this was very bad earlier': False,
  'this was very happy earlier': True,
  'this was very sad earlier': False,
  'i was good and not bad earlier': True,
  'i was not good and not happy earlier': False,
  'i am not at all bad or sad right now': True,
  'i am not at all good or happy right now': False,
  'this was not happy and not good earlier': False,
}

test_data = {
  'this is happy': True,
  'i am good': True,
  'this is not happy': False,
  'i am not good': False,
  'this is not bad': True,
  'i am not sad': True,
  'i am very good': True,
  'this is very bad': False,
  'i am very sad': False,
  'this is bad not good': False,
  'this is good and happy': True,
  'i am not good and not happy': False,
  'i am not at all sad': True,
  'this is not at all good': False,
  'this is not at all bad': True,
  'this is good right now': True,
  'this is sad right now': False,
  'this is very bad right now': False,
  'this was good earlier': True,
  'i was not happy and not good earlier': False,
}

In [None]:
import numpy as np
import random

# from rnn import RNN
# from data import train_data, test_data

# Create the vocabulary.
vocab = list(set([w for text in train_data.keys() for w in text.split(' ')]))
vocab_size = len(vocab)
print('%d unique words found' % vocab_size)

# Assign indices to each word.
word_to_idx = { w: i for i, w in enumerate(vocab) }
idx_to_word = { i: w for i, w in enumerate(vocab) }
# print(word_to_idx['good'])
# print(idx_to_word[0])

def createInputs(text):
  '''
  Returns an array of one-hot vectors representing the words in the input text string.
  - text is a string
  - Each one-hot vector has shape (vocab_size, 1)
  '''
  inputs = []
  for w in text.split(' '):
    v = np.zeros((vocab_size, 1))
    v[word_to_idx[w]] = 1
    inputs.append(v)
  return inputs

def softmax(xs):
  # Applies the Softmax Function to the input array.
  return np.exp(xs) / sum(np.exp(xs))

# Initialize our RNN!
rnn = RNN(vocab_size, 2)

def processData(data, backprop=True):
  '''
  Returns the RNN's loss and accuracy for the given data.
  - data is a dictionary mapping text to True or False.
  - backprop determines if the backward phase should be run.
  '''
  items = list(data.items())
  random.shuffle(items)

  loss = 0
  num_correct = 0

  for x, y in items:
    inputs = createInputs(x)
    target = int(y)

    # Forward
    out, _ = rnn.forward(inputs)
    probs = softmax(out)

    # Calculate loss / accuracy
    loss -= np.log(probs[target])
    num_correct += int(np.argmax(probs) == target)

    if backprop:
      # Build dL/dy
      d_L_d_y = probs
      d_L_d_y[target] -= 1

      # Backward
      rnn.backprop(d_L_d_y)

  return loss / len(data), num_correct / len(data)

# Training loop
for epoch in range(1000):
  train_loss, train_acc = processData(train_data)

  if epoch % 100 == 99:
    print('--- Epoch %d' % (epoch + 1))
    print('Train:\tLoss %.3f | Accuracy: %.3f' % (train_loss, train_acc))

    test_loss, test_acc = processData(test_data, backprop=False)
    print('Test:\tLoss %.3f | Accuracy: %.3f' % (test_loss, test_acc))