# Deep Learning Tests

## 1. Linear Regression
https://d2l.ai/chapter_linear-networks/linear-regression-scratch.html

### 1.1. Linear regression from scratch in NumPy

In [1]:
import numpy as np
import random

# Define the true weights and bias of the model
w_true = np.array([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = np.random.default_rng().normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = np.matmul(X, w_true)+b_true+np.random.default_rng().normal(0, 0.01, number_examples)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Initialize the weights and bias to recover
w = np.random.default_rng().normal(0, 1, number_features)
b = 0

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = np.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate the indices for all the examples and shuffle them
    example_indices =  np.arange(number_examples)
    random.shuffle(example_indices)
    
    # Initialize a list for the mean loss over the examples of every minbatch
    batch_loss = []
    
    # Loop over the examples in batches
    for j in np.arange(0, number_examples, batch_size):
        
        # Get the indices of the (randomized) examples for one minibatch
        batch_indices = example_indices[j:min(j+batch_size, number_examples)]
        
        # Get the inputs and outputs for the current minibatch
        X_batch = X[batch_indices, :]
        y_batch = y[batch_indices]
        
        # Compute the predicted outputs
        y_hat = np.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = np.mean(0.5*np.power(y_hat-y_batch, 2))
        
        # Save the mean loss for the current minibatch
        batch_loss.append(l)
        
        # Update the weights and bias using stochastic gradient descent
        w = w - lr*np.mean(X_batch*(y_hat-y_batch)[:, np.newaxis], axis=0)
        b = b - lr*np.mean(y_hat-y_batch, axis=0)
        
    # Update the mean loss for the current epoch
    epoch_loss[i] = np.mean(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')

1/3: 2.877929794325924
2/3: 0.00457104134012375
3/3: 5.3823265050773114e-05


### 1.2. Linear regression from scratch in PyTorch

In [18]:
import torch
import random

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = torch.normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = torch.matmul(X, w_true)+b_true+torch.normal(0, 0.01, [number_examples]) # [number_examples]?

# Define a function to read the dataset in random batches
def batch(X, y, batch_size):
    
    # Generate the indices for all the examples and shuffle them
    number_examples = X.shape[0]
    example_indices = list(range(number_examples))
    random.shuffle(example_indices)
    
    # Loop over the examples in batches
    for i in range(0, number_examples, batch_size):
        
        # Get the indices of the (randomized) examples for one minibatch
        batch_indices = example_indices[i:min(i+batch_size, number_examples)]
        
        # Return the input and output minibatch and continue the iteration in the function
        yield X[batch_indices], y[batch_indices]

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Initialize the weights and bias to recover, requiring the gradients to be computed
w = torch.normal(0, 1, [number_features], requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)
        
# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in batches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = torch.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = 0.5*(y_hat-y_batch)**2
        
        # Compute the gradient on l with respect to w and b
        # (sum and not mean as the gradients will be divided by the batch size during the SGD)
        l.sum().backward()
        
        # Temporarily sets all of the requires_grad flags to false
        with torch.no_grad():
            
            # Save the mean loss for the current minibatch
            batch_loss.append(l.mean())
            
            # Update the weights and bias using stochastic gradient descent
            # (use augmented assignments to avoid modifying existing variables)
            w -= lr*w.grad/len(l)
            b -= lr*b.grad/len(l)
            
            # Set the gradients to zeros to avoid accumulating gradients
            w.grad.zero_()
            b.grad.zero_()
            
    # Update the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')

1/3: 4.321141719818115
2/3: 0.014720503240823746
3/3: 0.00010053945879917592


### 1.3. Linear regression using APIs in PyTorch

In [38]:
import torch
from torch.utils import data
from torch import nn

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = torch.normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = torch.matmul(X, w_true)+b_true+torch.normal(0, 0.01, [number_examples]) # [number_examples]?

# Define a function to read the dataset in random batches
def batch(X, y, batch_size):
    
    # Construct a PyTorch data iterator (?)
    data_set = data.TensorDataset(*(X, y))
    return data.DataLoader(data_set, batch_size, shuffle=True)

# Define the model
# - Sequential class: defines container to chain several layers together
# - Linear class: defines fully-connected layer (with input and output feature dimensions)
model = nn.Sequential(nn.Linear(number_features, 1))

# Initialize the parameters
# - model[0]: accessing first layer in the model
# - weight.data and bias.data to access the parameters
model[0].weight.data.normal_(0, 0.01)
model[0].bias.data.fill_(0)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Define the loss function (mean squared error, without the 0.5 factor)
loss = nn.MSELoss()

# Define the optimization algorithm (stochastic gradient descent)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in batches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = model(X_batch)
        
        # Compute the loss between the predicted and true outputs
        l = loss(y_hat, y_batch[:, None])
        
        # Save the loss for the current minibatch (no with torch.no_grad())
        batch_loss.append(l)
        
        # Set the gradients to zero (.zero_grad()?)
        optimizer.zero_grad()
        
        # Computes the gradient (no .sum?)
        l.backward()
        
        # Performs a single parameter update
        optimizer.step()
        
    # Update the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
        
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')

1/3: 2.891758441925049
2/3: 0.00010611279139993712
3/3: 9.781956759979948e-05


### 1.4. Linear regression using higher-level APIs in Keras

In [3]:
# HERE: Check Keras and print predicted weights and bias everywhere

In [6]:
import numpy as np
import tensorflow as tf

# Define the true weights and bias of the model
w_true = np.array([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = np.random.default_rng().normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = np.matmul(X, w_true)+b_true+np.random.default_rng().normal(0, 0.01, number_examples)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Define the model (as a feedforward NN)
model = tf.keras.Sequential()

# Add an input with the number of features
model.add(tf.keras.Input(shape=number_features))

# Add a densely-connected NN layer without activation and with initialized weights and bias
model.add(tf.keras.layers.Dense(1, activation=None, \
                                kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), \
                                bias_initializer='zeros'))

# Configure the model for training with stochastic gradient descent optimizer and mean squared error loss
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr),
              loss='mean_squared_error')

# Train the model give the batch size and number of epochs
model.fit(x=X, y=y, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Print the predicted weights and bias
print("")
print(f"Predicted weights: {model.get_weights()[0][:, 0]}")
print(f"Predicted bias: {model.get_weights()[1][0]}")

Epoch 1/3
Epoch 2/3
Epoch 3/3

Predicted weights: [ 1.9997882 -3.3989787]
Predicted bias: 4.199860572814941
