In [1]:
from fastai.vision.all import *

In [2]:
path = untar_data(URLs.MNIST_SAMPLE)

**Direcciones

In [3]:
train3_folder = (path/'train'/'3').ls() # dirección de las imágenes
train7_folder = (path/'train'/'7').ls()
valid3_folder = (path/'valid'/'3').ls()
valid7_folder = (path/'valid'/'7').ls()

**Creación de los tensores

In [4]:
def stack(folder):
    x = torch.stack([tensor(Image.open(o)) for o in (folder)]).float()/255 # crea una lista con las imágenes (puedo abrir cualquiera con show_image(train_7[])). Divie por 255 para que los valores estén entre 0 y 1
    return x

In [5]:
train_7 = stack(train7_folder) # tensor de rango-3 (6265 matrices de 28x28, siendo cada matriz una imagen)
train_3 = stack(train3_folder) # train_3.shape -> torch.Size([6265, 28, 28]) Se tienen así dos sets de entrenamiento, uno para el 3 y otro para el 7
valid_3 = stack(valid3_folder) # valid_3.shape -> torch.Size([1010, 28, 28])
valid_7 = stack(valid7_folder) # valid_7.shape -> torch.Size([1028, 28, 28]) Se tienen así dos sets de validación, uno para el 3 y otro para el 7

In [6]:
train_x = torch.cat([train_3, train_7]).view(-1, 28*28) # concatena los tensores tridimensionales de entrenamiento, y los convierte en una lista de vectores. Las matrices de 28x28 ahora son vectores de largo 28x28=784
train_y = tensor([1]*len(train3_folder) + [0]*len(train7_folder)).unsqueeze(1) # crea un vector con tantos 1 como elementos hay en la train3_folder, y tantos 0 como elementos hay en train7_folder

valid_x = torch.cat([valid_3, valid_7]).view(-1, 28*28) # el set de validación se muestra también como una colección de vectores
valid_y = tensor([1]*len(valid3_folder) + [0]*len(valid7_folder)).unsqueeze(1) # básicamente, el vector de labels para valid_x

**Creación de los datasets

In [7]:
dset = list(zip(train_x, train_y)) # genera un arreglo de tuplas (train_x,train_y) --es un arreglo donde cada vector tiene su correspondiente etiqueta--
dset_valid = list(zip(valid_x, valid_y)) # idem para el set de validación

In [8]:
class BasicOpt: # Mediante el atributo Self, se puede acceder a los atributos y métodos de la clase en Python.
    def __init__(self, params, lr):
        self.params, self.lr = list(params), lr
        
    def step(self):
        for p in self.params:
            p.data -= p.grad.data * self.lr
            
    def zero_grad(self):
        for p in self.params:
            p.grad = None
    

In [9]:
linear1 = nn.Linear(28*28,1)

In [10]:
def mnist_loss(preds, target):
    preds = preds.sigmoid()
    return torch.where(target ==1, 1-preds, preds).mean()

In [11]:
def mse(preds, target):
    preds= preds.sigmoid()
    return ((preds-target)**2).mean()

In [12]:
dl = DataLoader(dset, batch_size=56) # crea lotes de elementos, para procesar en paralelo
dl_valid = DataLoader(dset_valid, batch_size=56) # idem, con el set de validación

In [13]:
def calc_grad(x, y, model):
    preds = linear1(x)
    loss = mnist_loss(preds, y) # calcula el loss para las predicciones respecto a sus labels
    loss.backward()

In [14]:
def metric(pred, target):
    return (pred.sigmoid().round().unsqueeze(1) == target).float().mean() # consultar bien esta línea

# pred es la predicción hecha con linear1
# sigmoid() limita los valores entre 0 y 1
# round() los redondea a enteros
# unsqueeze(1) le da la misma forma que target, para poder comparar
# la comparación da en valores booleanos
# .float() los convierte en números
# .mean() saca el promedio

Para usar de ejemplo lo anterior, se puede probar:<br>
batch_x = list(dl)[43][0]<br>
batch_y = list(dl)[43][1]<br>
pred = linear1(batch_x)<br>
pred<br>
pred.sigmoid()<br>
pred.sigmoid().round()<br>
(pred.sigmoid().round().unsqueeze(1)<br>
(pred.sigmoid().round().unsqueeze(1) == batch_y)<br>
(pred.sigmoid().round().unsqueeze(1) == batch_y).float()<br>
(pred.sigmoid().round().unsqueeze(1) == batch_y).float().mean()<br>

In [17]:
lr = 1
optimizer = BasicOpt(linear1.parameters(),lr)

def train_epoch(model):
    for x, y in dl: # 'x' sería un lote de entrenamiento, 'y' sus labels
        calc_grad(x, y, linear1)
        optimizer.step()
        optimizer.zero_grad()
        

In [18]:
def validate_epoch(model):
    accurates = [metric(linear1(x), y) for x,y in dl_valid] # cuenta los aciertos para cada lote
    return round(torch.stack(accurates).mean().item(), 4) # hace un vector con los valores, y calcular el promedio. El 4 es la cantidad de decimales

In [19]:
def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end='\n')

In [20]:
train_model(linear1,20)

0.6106
0.7182
0.8215
0.8712
0.8958
0.9103
0.9219
0.9301
0.9407
0.9436
0.9499
0.9533
0.9552
0.9586
0.96
0.9619
0.9629
0.9634
0.9658
0.9673


CONSULTAR BIEN CÓMO SE INICIALIZAN LOS PARÁMETROS, SI NUNCA LLAMÉ A OPT_INIT. ES NECESARIO INICIALIZAR MANUALMENTE UN LEARNING RATE, PERO LOS DEMÁS?

In [21]:
opt = SGD(linear1.parameters(),lr) # SGD hace lo mismo que Optimizer, pero viene con fastai

In [22]:
dls = DataLoaders(dl, dl_valid) # dataloaders

In [33]:
learn = Learner(dls, linear1, opt_func=SGD, loss_func=mnist_loss, metrics=metric)

In [35]:
learn.fit(50, lr=5)

epoch,train_loss,valid_loss,metric,time
0,0.001393,0.046402,0.953946,00:00
1,0.001384,0.045961,0.953946,00:00
2,0.001374,0.045611,0.954928,00:00
3,0.001362,0.045346,0.954928,00:01
4,0.001349,0.04514,0.954928,00:01
5,0.001339,0.044955,0.954928,00:00
6,0.001333,0.044777,0.957872,00:01
7,0.001332,0.044581,0.957872,00:00
8,0.001331,0.044294,0.957381,00:01
9,0.001327,0.043876,0.957872,00:00


In [36]:
def init_params(size, std=1.0):
    return (torch.randn(size)*std).requires_grad_()

In [37]:
def simple_net1(x):
    res = x@w1 + b1
    res = res.max(tensor(0.0))
    res = x@w2 + b2
    return res

In [39]:
w1 = init_params(28*28,54)
b1 = init_params(54)
w2 = init_params((54,1))
b2 = init_params(1)

In [46]:
simple_net2 = nn.Sequential(
    nn.Linear(28*28,54),
    nn.ReLU(),
    nn.Linear(54,1)
)

In [49]:
learn = Learner(dls, simple_net1, opt_func=SGD, loss_func=mnist_loss, metrics=metric)
#learn.fit(40)

learn = Learner(dls, simple_net2, opt_func=SGD, loss_func=mnist_loss, metrics=metric)
learn.fit(40,0.1)

epoch,train_loss,valid_loss,metric,time
0,0.004306,0.019461,0.979462,00:01
1,0.004281,0.019252,0.980934,00:01
2,0.004257,0.019059,0.980934,00:01
3,0.004232,0.01888,0.981424,00:01
4,0.004206,0.018719,0.981424,00:01
5,0.00418,0.018576,0.981424,00:01
6,0.004154,0.018447,0.981424,00:01
7,0.004127,0.018332,0.981424,00:00
8,0.004099,0.018229,0.980934,00:01
9,0.004071,0.018135,0.980934,00:01
