# Neural Networks From Scratch

In [1]:
import numpy as np
from datasets import load_dataset
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [104]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


def forward_propagation(X, weights, biases, layers):
    a = [X]
    z = []
    for i in range(len(layers)):
        z = np.dot(a[-1], weights[i]) + biases[i]
        a.append(sigmoid(z))
    return a, z

def backpropagation(X, y, layers, learning_rate, num_iterations):
    """
    Train a neural network with backpropagation algorithm.

    Parameters:
        X (numpy array): Input data, shape (num_samples, input_size)
        y (numpy array): Target data, shape (num_samples, output_size)
        layers (list): List of integers, specifying the number of neurons in each layer
        learning_rate (float): Learning rate for updating weights andbiases
        num_iterations  (int): Number of iterations for training

    Returns:
        tuple: A tuple of two lists, representing the learned weights and biases for each layer.
    """
    assert X.shape[0] == y.shape[0], "Number of samples in input and target data must be the same."

    # Initialize the weights and biases
    weights = [np.random.normal(0, 1, (X.shape[1], layers[0]))]
    biases = [np.zeros((1, layers[0]))]
    for i in range(1, len(layers)):
        weights.append(np.random.normal(0, 1, (layers[i-1], layers[i])))
        biases.append(np.zeros((1, layers[i])))

    # Backpropagation iterations
    for i in range(1, num_iterations + 1):
        a, z = forward_propagation(X, weights, biases, layers)

        # Compute the cost function
        error = a[-1] - y
        cost = np.mean(np.square(error))

        if i % 100 == 0:
            print("Iteration: {}, Cost: {}".format(i, cost))

        # Backward propagation
        delta = [error * sigmoid_derivative(z[-1])]
        for j in range(len(layers)-1, 0, -1):
            delta.append(np.dot(delta[-1], weights[j].T) * sigmoid_derivative(z[j-1]))

        delta.reverse()

        # Update the weights and biases
        for j in range(len(layers)):
            weights[j] -= learning_rate * np.dot(a[j].T, delta[j])
            biases[j] -= learning_rate * np.mean(delta[j], axis=0)


    return (weights, biases)


In [170]:
# Load a percentage split of the imdb dataset
dataset = load_dataset("imdb", split="train[:50%]")

# Load the pre-trained SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode the text in the dataset using the pre-trained model
X = np.array(dataset["text"])
y = np.array(dataset["label"])

y = y.reshape(-1, 1)

# Print the shapes of the encoded input data and target data
print("X shape:", X.shape)
print("y shape:", y.shape)

Using the latest cached version of the module from C:\Users\ayale\.cache\huggingface\modules\datasets_modules\datasets\imdb\d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0 (last modified on Thu Apr 20 23:14:26 2023) since it couldn't be found locally at imdb., or remotely on the Hugging Face Hub.
Found cached dataset imdb (C:/Users/ayale/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)


X shape: (12500,)
y shape: (12500, 1)


In [132]:
epochs = 3
layers = [10, 10, 1]
iterations = 1000
lr = 0.01

In [133]:
for epoch in range(1, epochs + 1):
    print("Beginning epoch {}".format(epoch))
    w, b = backpropagation(X, y, layers, lr, iterations)

Beginning epoch 1
Iteration: 100, Cost: 0.00011932187330181078
Iteration: 200, Cost: 5.9410640348254804e-05
Iteration: 300, Cost: 3.9517343051829024e-05
Iteration: 400, Cost: 2.9593978688064804e-05
Iteration: 500, Cost: 2.3649929734016053e-05
Iteration: 600, Cost: 1.96923344166693e-05
Iteration: 700, Cost: 1.6868405907751267e-05
Iteration: 800, Cost: 1.4752273401714378e-05
Iteration: 900, Cost: 1.3107584792194719e-05
Iteration: 1000, Cost: 1.1792653616781468e-05
Beginning epoch 2
Iteration: 100, Cost: 3.396153266828658e-07
Iteration: 200, Cost: 3.387013241676641e-07
Iteration: 300, Cost: 3.3779234249662493e-07
Iteration: 400, Cost: 3.3688834011890093e-07
Iteration: 500, Cost: 3.359892759424322e-07
Iteration: 600, Cost: 3.350951093275984e-07
Iteration: 700, Cost: 3.342058000810254e-07
Iteration: 800, Cost: 3.333213084494373e-07
Iteration: 900, Cost: 3.3244159511365286e-07
Iteration: 1000, Cost: 3.315666211826612e-07
Beginning epoch 3
Iteration: 100, Cost: 0.000219043329618154
Iteration:

In [137]:
a, z = forward_propagation(model.encode("probably the worst ever movie i've seen"), w, b, layers)
print(a[-1][0])

[0.00579483]


In [172]:
import torch
import torch.nn as nn

# Define the hyperparameters
embedding_size = 384  # Size of the word embeddings
hidden_size = 64  # Size of the hidden layer
output_size = 1  # Size of the output layer (binary classification)
learning_rate = 0.001  # Learning rate for the optimizer
batch_size = 32  # Number of samples in each batch
num_epochs = 10  # Number of epochs for training

# Define the neural network architecture

class SentenceTransformerModule(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = SentenceTransformer(model_name)
    
    def forward(self, sentences):
        embeddings = self.model.encode(sentences)
        return torch.from_numpy(embeddings).float()


embeddings_model = SentenceTransformerModule('all-MiniLM-L6-v2')

class SentimentNet(nn.Module):
    def __init__(self, embedding_size, hidden_size, output_size):
        super(SentimentNet, self).__init__()
        self.embedding = embeddings_model
        self.fc1 = nn.Linear(embedding_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.sigmoid(self.fc2(x))
        return x


# Create an instance of the neural network
model = SentimentNet(embedding_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Print the model summary
print(model)

# Convert the input data and target data to PyTorch tensors
X_train = X
y_train = torch.tensor(y).float()


SentimentNet(
  (embedding): SentenceTransformerModule(
    (model): SentenceTransformer(
      (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
      (2): Normalize()
    )
  )
  (fc1): Linear(in_features=384, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)


In [173]:
# Train the neural network
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        # Forward pass
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Compute the accuracy on the training set
    predictions = (outputs > 0.5).float()
    accuracy = torch.mean((predictions == targets).float())

    # Print the loss and accuracy for each epoch
    print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
          .format(epoch+1, num_epochs, loss.item(), accuracy*100))


Epoch [1/10], Loss: 0.0018, Accuracy: 100.00%
Epoch [2/10], Loss: 0.0004, Accuracy: 100.00%
Epoch [3/10], Loss: 0.0002, Accuracy: 100.00%
Epoch [4/10], Loss: 0.0001, Accuracy: 100.00%
Epoch [5/10], Loss: 0.0001, Accuracy: 100.00%
Epoch [6/10], Loss: 0.0000, Accuracy: 100.00%
Epoch [7/10], Loss: 0.0000, Accuracy: 100.00%
Epoch [8/10], Loss: 0.0000, Accuracy: 100.00%
Epoch [9/10], Loss: 0.0000, Accuracy: 100.00%
Epoch [10/10], Loss: 0.0000, Accuracy: 100.00%


In [177]:
model("best movie i've ever seen. tom croose's acting was amazing!")

tensor([1.4968e-05], grad_fn=<SigmoidBackward0>)