# Deep Learning Tests

## 1. Linear Regression
https://d2l.ai/chapter_linear-networks/linear-regression.html

### 1.1. Linear regression from scratch in NumPy

In [1]:
import numpy as np
import random

# Define the true weights and bias of the model
w_true = np.array([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = np.random.default_rng().normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = np.matmul(X, w_true)+b_true+np.random.default_rng().normal(0, 0.01, number_examples)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Initialize the weights and bias to recover
w = np.random.default_rng().normal(0, 1, number_features)
b = 0

# Initialize a list for the mean loss over the minibatches of every epoch
epoch_loss = np.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate the indices for all the examples and shuffle them
    example_indices = np.arange(number_examples)
    random.shuffle(example_indices)
    
    # Initialize a list for the mean loss over the examples of every minbatch
    batch_loss = []
    
    # Loop over the examples in batches
    for j in np.arange(0, number_examples, batch_size):
        
        # Get the indices of the (randomized) examples for one minibatch
        batch_indices = example_indices[j:min(j+batch_size, number_examples)]
        
        # Get the inputs and outputs for the current minibatch
        X_batch = X[batch_indices, :]
        y_batch = y[batch_indices]
        
        # Compute the predicted outputs
        y_hat = np.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = np.mean(0.5*np.power(y_hat-y_batch, 2))
        
        # Save the loss for the current minibatch
        batch_loss.append(l)
        
        # Update the weights and bias using stochastic gradient descent
        w = w - lr*np.mean(X_batch*(y_hat-y_batch)[:, np.newaxis], axis=0)
        b = b - lr*np.mean(y_hat-y_batch, axis=0)
        
    # Update the mean loss for the current epoch
    epoch_loss[i] = np.mean(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {w}')
print(f'b = {b}')

1/3: 3.9596817087120337
2/3: 0.008516841412285217
3/3: 7.255705744279113e-05

w = [ 1.99930911 -3.39979572]
b = 4.199669463443418


### 1.2. Linear regression from scratch in PyTorch

In [1]:
import torch
import random

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs, sampled from a standard normal distribution
number_examples = 1000
number_features = len(w_true)
X = torch.normal(0, 1, (number_examples, number_features))

# Derive the outputs, with some noise
y = torch.matmul(X, w_true)+b_true+torch.normal(0, 0.01, [number_examples]) # [number_examples]?

# Define a function to read the dataset in random batches
def batch(X, y, batch_size):
    
    # Generate the indices for all the examples and shuffle them
    number_examples = X.shape[0]
    example_indices = list(range(number_examples))
    random.shuffle(example_indices)
    
    # Loop over the examples in batches
    for i in range(0, number_examples, batch_size):
        
        # Get the indices of the (randomized) examples for one minibatch
        batch_indices = example_indices[i:min(i+batch_size, number_examples)]
        
        # Return the input and output minibatch and continue the iteration in the function
        yield X[batch_indices], y[batch_indices]

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Initialize the weights and bias to recover, requiring the gradients to be computed
w = torch.normal(0, 1, [number_features], requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)
        
# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in batches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = torch.matmul(X_batch, w) + b
        
        # Compute the loss between the predicted and true outputs
        l = 0.5*(y_hat-y_batch)**2
        
        # Compute the gradient on l with respect to w and b
        # (sum and not mean as the gradients will be divided by the batch size during the SGD)
        l.sum().backward()
        
        # Temporarily sets all of the requires_grad flags to false
        with torch.no_grad():
            
            # Save the mean loss for the current minibatch
            batch_loss.append(l.mean())
            
            # Update the weights and bias using stochastic gradient descent
            # (use augmented assignments to avoid modifying existing variables)
            w -= lr*w.grad/len(l)
            b -= lr*b.grad/len(l)
            
            # Set the gradients to zeros to avoid accumulating gradients
            w.grad.zero_()
            b.grad.zero_()
            
    # Update the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {w}')
print(f'b = {b}')

1/3: 3.427608013153076
2/3: 0.008017840795218945
3/3: 6.859275163151324e-05

w = tensor([ 1.9993, -3.3998], requires_grad=True)
b = tensor([4.2002], requires_grad=True)


### 1.3. Linear regression using APIs in PyTorch

In [16]:
import torch
from torch.utils import data
from torch import nn

# Define the true weights and bias of the model
w_true = torch.tensor([2, -3.4])
b_true = 4.2

# Generate inputs and derive outputs
number_examples = 1000
number_features = len(w_true)
X = torch.normal(0, 1, (number_examples, number_features))
y = torch.matmul(X, w_true)+b_true+torch.normal(0, 0.01, [number_examples]) # [number_examples]?

# Define a function to read the dataset in random batches
def batch(X, y, batch_size):
    
    # Construct a PyTorch data iterator (?)
    data_set = data.TensorDataset(*(X, y))
    return data.DataLoader(data_set, batch_size, shuffle=True)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Define the model (as a stack of layers) and add a fully-connected layer
model = nn.Sequential(nn.Linear(number_features, 1))

# Initialize the parameters
model[0].weight.data.normal_(0, 0.01)
model[0].bias.data.fill_(0)

# Define the loss function (mean squared error, without the 0.5 factor)
loss = nn.MSELoss()

# Define the optimization algorithm (stochastic gradient descent)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = torch.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Initialize a list for the mean loss over the examples of every minibatch
    batch_loss = []
    
    # Loop over the examples in batches
    for X_batch, y_batch in batch(X, y, batch_size):
        
        # Compute the predicted outputs
        y_hat = model(X_batch)
        
        # Compute the loss between the predicted and true outputs
        l = loss(y_hat, y_batch[:, None])
        
        # Save the loss for the current minibatch (no with torch.no_grad())
        batch_loss.append(l)
        
        # Set the gradients to zero (.zero_grad()?)
        optimizer.zero_grad()
        
        # Computes the gradient (no .sum?)
        l.backward()
        
        # Performs a single parameter update
        optimizer.step()
        
    # Update the mean loss for the current epoch
    epoch_loss[i] = sum(batch_loss)/len(batch_loss)
        
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')
    
# Print the predicted weights and bias
print('')
print(f'w = {model[0].weight.data}')
print(f'b = {model[0].bias.data}')

1/3: 2.8696396350860596
2/3: 0.00011544800509000197
3/3: 0.0001040133647620678

w = tensor([[ 2.0003, -3.3997]])
b = tensor([4.2010])


### 1.4. Linear regression using higher-level APIs in Keras

In [5]:
import numpy as np
import tensorflow as tf

# Define the true weights and bias of the model
w_true = np.array([2, -3.4])
b_true = 4.2

# Generate inputs and derive outputs
number_examples = 1000
number_features = len(w_true)
X = np.random.default_rng().normal(0, 1, (number_examples, number_features))
y = np.matmul(X, w_true)+b_true+np.random.default_rng().normal(0, 0.01, number_examples)

# Define the parameters for the training
number_epochs = 3
batch_size = 10
lr = 0.03

# Define the model (as a stack of layers) and add a densely-connected NN layer with initialized parameters
model = tf.keras.Sequential([tf.keras.layers.Dense(1, kernel_initializer=tf.initializers.RandomNormal(mean=0, stddev=0.01), \
                                                   bias_initializer='zeros')])

# Configure the model for training with stochastic gradient descent optimizer and mean squared error loss
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=lr),
              loss=tf.keras.losses.MeanSquaredError())

# Train the model given the batch size and number of epochs
model.fit(x=X, y=y, batch_size=batch_size, epochs=number_epochs, verbose=1)

# Print the predicted weights and bias
print('')
print(f'w = {model.get_weights()[0]}')
print(f'b = {model.get_weights()[1]}')

Epoch 1/3
Epoch 2/3
Epoch 3/3

w = [[ 1.9999057]
 [-3.3999813]]
b = [4.200842]


## 2. Softmax Regression
https://d2l.ai/chapter_linear-networks/softmax-regression.html

### 2.1. Softmax regression from scratch in NumPy

In [15]:
import numpy as np
import random
import tensorflow as tf

# Get the data as train and test inputs and outputs
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
number_train = len(X_train)
number_test = len(X_test)

# Flatten and normalize the inputs
input_size = np.size(X_train[0])
X_train = np.reshape(X_train, (number_train, input_size))/255
X_test = np.reshape(X_test, (number_test, input_size))/255

# Turn the outputs into one-hot vectors
output_size = 10
y_train1 = np.zeros((number_train, output_size))
y_test1 = np.zeros((number_test, output_size))
for i in range(number_train): y_train1[i, y_train[i]] = 1
for i in range(number_test): y_test1[i, y_test[i]] = 1

# Define the parameters for the training
number_epochs = 10
batch_size = 256
lr = 0.1

# Initialize the weights and bias to recover
W = np.random.default_rng().normal(0, 0.01, size=(input_size, output_size)) # 0.01?
b = np.zeros(output_size)

# Initialize an array for the mean loss over the minibatches of every epoch
epoch_loss = np.zeros(number_epochs)

# Loop over the epochs
for i in range(number_epochs):
    
    # Generate the indices for all the examples and shuffle them
    train_indices = np.arange(number_train)
    random.shuffle(train_indices)
    
    # Initialize a list for the mean loss over the examples of every minbatch
    batch_loss = []
    
    # Loop over the examples in batches
    for j in np.arange(0, number_train, batch_size):
        
        # Get the indices of the (randomized) examples for one minibatch
        batch_indices = train_indices[j:min(j+batch_size, number_train)]
        
        # Get the inputs and outputs for the current minibatch
        X_batch = X_train[batch_indices, :]
        y_batch = y_train1[batch_indices]
        
        # Compute the predicted outputs (logits)
        o = np.matmul(X_batch, W) + b
        
        # Compute the softmax of the logits (indirectly to avoid numerical stability issues)
        o = o-np.max(o, axis=1)[:, np.newaxis]
        o_exp = np.exp(o)
        y_hat = o_exp/np.sum(o_exp, axis=1)[:, np.newaxis]
        
        # Compute the mean cross-entropy loss over the minibatch
        l = np.mean(np.log(np.sum(o_exp, axis=1)-np.sum(y_batch*o, axis=1)))
        
        # Save the loss for the current minibatch
        batch_loss.append(l)
        
        # HERE!!!
        dl = y_hat-y_batch
        
        W = W-lr*
        
    # Update the mean loss for the current epoch
    epoch_loss[i] = np.mean(batch_loss)
    
    # Print the progress
    print(f'{i+1}/{number_epochs}: {epoch_loss[i]}')

1/10: 2.152900399157075
2/10: 2.1528876740472875
3/10: 2.152915281652977
4/10: 2.152878180835081
5/10: 2.1529124906719153
6/10: 2.152913458166309
7/10: 2.1528916592550185
8/10: 2.1529073294142425
9/10: 2.1528966850975335
10/10: 2.152916965646823



NameError: name 'w' is not defined

In [16]:
np.shape(X_train)

(60000, 784)

In [10]:
np.shape(o)

(256, 10)

### 2.2. Softmax regression from scratch in PyTorch

In [6]:
import torchvision
from torch.utils import data

# Get the dataset (convert the data to 32-bit floating point tensors in [0, 1])
fmnist_train = torchvision.datasets.FashionMNIST(
    root="data", train=True, transform=torchvision.transforms.ToTensor(), download=True)
fmnist_test = torchvision.datasets.FashionMNIST(
    root="data", train=False, transform=torchvision.transforms.ToTensor(), download=True)

# Define the labels
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']

In [29]:
a = [i[1] for i in fmnist_train]

In [31]:
import mnist_reader
X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')

ModuleNotFoundError: No module named 'mnist_reader'

In [9]:

X, y = data.DataLoader(fmnist_train, batch_size=1, shuffle=True)

ValueError: too many values to unpack (expected 2)