# Pytorch tensors
Create a tensor of size 3x3 with any values. It should be created on the GPU if available.

In [None]:
import torch

# Set the device to be used for the tensor
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.device(device)

# Create a tensor on the appropriate device
my_tensor = torch.tensor([[5,6,7], [1,2,3], [0,9,8]])

# print the tensor
print(my_tensor)

In [None]:
# Check the previous cell
assert my_tensor.device.type in {"cuda", "cpu"}
assert my_tensor.shape == (3, 3)
print("Success!")

# Neural Net Constructor Kit `torch.nn`

The `torch.nn` module can be thought of as a constructor kit for neural networks. It provides the building blocks for creating NNs, including layers, activation functions, and more.

Create a 3-layer Multi-Layer Perceptron (MLP) neural network with the specs:
 - input layer: 784 neurons
 - hidden layer: 128 neurons
 - output layer: 10 neurons

Use the ReLU activation function for the hidden layer and the softmax activation functio for the output layer. Print the neural network

In [None]:
import torch.nn as nn

class MyMLP(nn.Module):
    def __init__(self):
        super(MyMLP, self).__init__()
        self.fc1 = nn.Linear(784, 128) # 784 input, 128 output (input layer, hidden layer)
        self.fc2 = nn.Linear(128, 10) # 128 input, 10 output (output layer)
        self.relu = nn.ReLU()
        self. softmax = nn.Softmax(dim=1) # dim=1 means the softmax will be applied to the 1st dimension
        
    def forward(self, x):
        # Pass the input to the second layer
        x = self.fc1(x)
        # Apply the ReLU activation function
        x = self.relu(x)
        # Pass the result to the final layer
        x = self.fc2(x)
        # Apply the softmax activation function
        x = self.softmax(x)

        return x

my_mlp = MyMLP()
print(my_mlp)

In [None]:
# Check the number of inputs
assert my_mlp.fc1.in_features == 784

# Check the number of outputs
assert my_mlp.fc2.out_features == 10

# Check the number of nodes in the hidden layer
assert my_mlp.fc1.out_features == 128

# Check that my_mlp.fc1 is a fully connected layer
assert isinstance(my_mlp.fc1, nn.Linear)

# Check that my_mlp.fc2 is a fully connected layer
assert isinstance(my_mlp.fc2, nn.Linear)

# 

# Loss Functions and Optimizers
Pytorch comes with a number of built-in loss functions and optimizers that can be used to train NNs. Loss functions are implemented in the `torch.nn` module, and optimizers in the `torch.optim` module.

Task:
 - Create a loss function using the `torch.nn.CrossEntropyLoss` class.
 - Create an optimizer using the `torch.optim.SGD` class with a learning rate of 0.01.

In [None]:
import torch.optim as optim 

# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optimizer (by convention, us the var 'optimizer')
optimizer = optim.SGD(my_mlp.parameters(), lr=0.01)

# Check
assert isinstance(loss_fn, nn.CrossEntropyLoss), "loss_fn should be a instance of CrossEntropyLoss"
assert isinstance(optimizer, torch.optim.SGD), "optimizer should be a instance of SGD"
assert optimizer.defaults["lr"] == 0.01, "learning rate should be 0.01"
assert optimizer.param_groups[0]["params"] == list(my_mlp.parameters()), "Optimizer should used passed MLP parameters"

# Training loops
Training loops are easy!

In [None]:
def fake_training_loaders():
    for _ in range(30):
        yield torch.randn(64, 784), torch.randint(0, 10, (64,)) # 64 is the batch size, 784 is the number of features, 10 is the number of classes

for epoch in range(3):
    # training loop
    for i, data in enumerate(fake_training_loaders()):
        # Every data instance is an input + lable pair
        x, y = data

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass (predictions)
        y_pred = my_mlp(x)

        # Calculate the loss and its gradients
        loss = loss_fn(y_pred, y) # Compare prediction with the actual label
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        if i % 10 == 0:
            print(f"Epoch {epoch}, Batch {i}, Loss: {loss.item():.5f}")

        # Update the weights
        optimizer.step()

# Check
assert abs(loss.item() - 2.3) < 0.1, "the loss should be around 2.3 with random data"

# Download a model from HuggingFace and use it for sentiment analysis
Hugging face provides pre-tratine models that can be used for a variety of tasks. This exercise uses the `distilbert-base-uncased-finetuned-sst-2-english` model to perform sentiment analysis on a movie review.

Task:
 - Instantiate an AutoModelForSequenceClassification model using the `distilbert-base-uncased-finetuned-sst-2-english` model.
 - Instantiate an AutoTokenizer using the model.
 - Define a function that will get a prediction.

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

#device_map="auto" automatically allocates the model weights to your fastest device first, which is typically the GPU.
#torch_dtype="auto" directly initializes the model weights in the data type they’re stored in, which can help avoid loading the weights twice (PyTorch loads weights in torch.float32 by default).

pt_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

def get_prediction(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = pt_model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    return "positive" if predictions.item() == 1 else "negative"
    # return pt_model.config.id2label[predicted_class_id]

# print(get_prediction("This movie was great!")) << this crashes the kernel!

# Download a dataset from HuggingFace
HuggingFace provides a number of datasets that can be used for various taks. Use the `imdb` dataset and pass it to the above model.

In [None]:
from datasets import load_dataset # may require pip install datasets

# Load the test split of the dataset
dataset = load_dataset("imdb", split="test")

In [None]:
# Check
from pprint import pprint
from datasets import Dataset

assert isinstance(dataset, Dataset), "The dataset should be a Dataset object"
assert set(dataset.features.keys()) == {
    "label",
    "text"
}, "The dataset should have a label and a text feature"

pprint(dataset[0])

# Use the pre-trained model!
Make some predictions

In [None]:
# Get the last 3 reviews
reviews = dataset["text"][-3:]

# Get the last 3 labels
labels = dataset["label"][-3:]

# Check
for review, label in zip(reviews, labels):
    prediction = get_prediction(review)

    print(f"Review: {review[:80]}\n... {review[-80:]}")
    print(f'Label: {"Positive" if label else "negative"}')
    print(f"Prediction: {prediction}\n")