# MNIST con PyTorch

In [2]:
from sklearn.datasets import fetch_openml
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [3]:
import torch
import torch.nn as nn

In [4]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

In [8]:
from sklearn.model_selection import train_test_split

## Conjunto de datos

In [5]:
mnist = fetch_openml(name="mnist_784", version=1, data_home="./data/")

In [16]:
# Etiquetas de clases
clases = np.unique(mnist.target)
print(clases)

['0' '1' '2' '3' '4' '5' '6' '7' '8' '9']


In [47]:
lado = 28

In [17]:
X_total = mnist.data.to_numpy()
y_total = mnist.target.to_numpy()

### Entrenamiento validación y prueba

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X_total, y_total, test_size=0.3)

In [19]:
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size = 0.3)

In [20]:
print("X: ", X_train.shape, " Y:", y_train.shape)

X:  (49000, 784)  Y: (49000,)


### One hot

[OneHotEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html)

In [21]:
from sklearn.preprocessing import OneHotEncoder

In [29]:
enc = OneHotEncoder(sparse_output=False)
enc.fit(clases.reshape((-1,1)))

In [31]:
# Ejemplo
enc.transform([['3']])

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]])

In [32]:
Y_train = enc.transform(y_train.reshape(-1,1))

In [34]:
Y_train[0]

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.])

## Red

In [35]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [36]:
class TwoSigmoidModel(nn.Module):
    """
    Red neuronal con una capa oculta y sigmoides como funciones de activación.
    """
    
    def __init__(self, n_input = 2, n_hidden = 2, n_out = 1):
        """
        In the constructor we instantiate the nn.Linear modules and assign them as
        member variables.
        """
        super(TwoSigmoidModel, self).__init__()
        # El bias lo agrega PyTorch por defecto
        
        self.z1 = nn.Linear(n_input, n_hidden)
        self.z2 = nn.Linear(n_hidden, n_out)
        
    def forward(self, input):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors that can play the role of activation
        functions.
        
        Nota: La función _debe_ llamarse forward.
        """
        h1 = torch.sigmoid(self.z1(input))
        out = torch.sigmoid(self.z2(h1))
        return out

In [37]:
def show_w(axis, mat, name):
    cax = axis.matshow(mat.data.numpy())
    axis.set_title(name)
    return cax
    
def show_b(axis, mat, name):
    axis.matshow(mat.data.numpy().reshape((-1,1)))
    axis.set_title(name)

In [38]:
def train(model, X, Y, learning_rate, num_steps=500):
    """
    Recibe el modelo de red neuronal a entrenar,
    los datos de entrada X y los valores de salida deseados Y
    en tensores de PyTorch
    """
    errores = np.zeros(num_steps)
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()              # Binary Cross Entropy
    #criterion = torch.nn.MSELoss()       # Mean Squared Error
    for t in range(num_steps):
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(X)

        # Compute and print loss
        loss = criterion(y_pred, Y)
        #print(t, loss.item())
        errores[t] = loss.item()

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Grafica error
    p = plt.plot(np.arange(num_steps), errores)
    plt.title("Error")
    plt.ylabel("Entropía cruzada binaria")
    plt.xlabel("Iteración")
    
    # Grafica pesos
    parameters = list(model.parameters())
    fig, axes = plt.subplots(1,4, figsize=(12,1.5))
    w1 = parameters[0]
    b1 = parameters[1]
    w2 = parameters[2]
    b2 = parameters[3]
    if device != 'cpu':
        w1 = w1.cpu()
        b1 = b1.cpu()
        w2 = w2.cpu()
        b2 = b2.cpu()
    cax = show_w(axes[0], w1, "w1")
    show_b(axes[1], b1, "b1")
    show_w(axes[2], w2, "w2")
    show_b(axes[3], b2, "b2")
    fig.colorbar(cax)
    print("w1 = ", w1, " b1 = ", b1, " w2 = ", w2, " b2 = ", b2)

In [43]:
X_train.dtype

dtype('int64')

In [48]:
model = TwoSigmoidModel(n_input = lado * lado, n_hidden = 50, n_out = 10)
m = 10
X = torch.tensor(X_train[:m].astype(np.float32))
Y = torch.tensor(Y_train[:m].astype(np.float32))

if device != 'cpu':
    model.cuda()
    X = X.to(device)
    Y = Y.to(device)

In [49]:
Out = model.forward(X)

In [50]:
Out

tensor([[0.5012, 0.4460, 0.5654, 0.4400, 0.4172, 0.6466, 0.3182, 0.5388, 0.5691,
         0.4314],
        [0.5177, 0.5725, 0.5194, 0.5297, 0.4976, 0.5098, 0.3846, 0.6853, 0.4595,
         0.4952],
        [0.6168, 0.5306, 0.4821, 0.3738, 0.3837, 0.5236, 0.3697, 0.6146, 0.4846,
         0.5302],
        [0.5419, 0.4984, 0.4322, 0.4123, 0.3585, 0.6306, 0.4616, 0.6247, 0.5138,
         0.4336],
        [0.4913, 0.4944, 0.4982, 0.4837, 0.3673, 0.6352, 0.5004, 0.6788, 0.5325,
         0.3913],
        [0.4665, 0.5581, 0.4097, 0.5095, 0.3969, 0.6068, 0.4689, 0.6683, 0.5457,
         0.4441],
        [0.4381, 0.4888, 0.4825, 0.4801, 0.3286, 0.5909, 0.3976, 0.6637, 0.5744,
         0.5057],
        [0.5508, 0.5829, 0.5005, 0.3588, 0.4447, 0.6700, 0.4953, 0.6005, 0.5357,
         0.4248],
        [0.5134, 0.5456, 0.4728, 0.4948, 0.4776, 0.6159, 0.4060, 0.6393, 0.4965,
         0.3381],
        [0.5282, 0.4946, 0.4317, 0.3757, 0.5779, 0.5974, 0.4281, 0.6671, 0.5394,
         0.5331]], device='c