In [1]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, precision=2)

In [2]:
import csv
wine_path = "../data/tabular-wine/winequality-white.csv"
wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";", skiprows=1)
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [3]:
col_list = next(csv.reader(open(wine_path), delimiter=';'))

wineq_numpy.shape, col_list

((4898, 12),
 ['fixed acidity',
  'volatile acidity',
  'citric acid',
  'residual sugar',
  'chlorides',
  'free sulfur dioxide',
  'total sulfur dioxide',
  'density',
  'pH',
  'sulphates',
  'alcohol',
  'quality'])

In [4]:
wineq = torch.from_numpy(wineq_numpy)

wineq.shape, wineq.type()

(torch.Size([4898, 12]), 'torch.FloatTensor')

In [5]:
data = wineq[:, :-1] # <1>
data, data.shape

(tensor([[ 7.00,  0.27,  ...,  0.45,  8.80],
         [ 6.30,  0.30,  ...,  0.49,  9.50],
         ...,
         [ 5.50,  0.29,  ...,  0.38, 12.80],
         [ 6.00,  0.21,  ...,  0.32, 11.80]]), torch.Size([4898, 11]))

In [6]:
target = wineq[:, -1] # <2>
target, target.shape

(tensor([6., 6.,  ..., 7., 6.]), torch.Size([4898]))

In [7]:
target = wineq[:, -1].long()
target

tensor([6, 6,  ..., 7, 6])

In [8]:
data_mean = torch.mean(data, dim=0)
data_mean

tensor([6.85e+00, 2.78e-01, 3.34e-01, 6.39e+00, 4.58e-02, 3.53e+01, 1.38e+02,
        9.94e-01, 3.19e+00, 4.90e-01, 1.05e+01])

In [9]:
data_var = torch.var(data, dim=0)
data_var

tensor([7.12e-01, 1.02e-02, 1.46e-02, 2.57e+01, 4.77e-04, 2.89e+02, 1.81e+03,
        8.95e-06, 2.28e-02, 1.30e-02, 1.51e+00])

In [10]:
data_normalized = (data - data_mean) / torch.sqrt(data_var)
data_normalized

tensor([[ 1.72e-01, -8.18e-02,  ..., -3.49e-01, -1.39e+00],
        [-6.57e-01,  2.16e-01,  ...,  1.35e-03, -8.24e-01],
        ...,
        [-1.61e+00,  1.17e-01,  ..., -9.63e-01,  1.86e+00],
        [-1.01e+00, -6.77e-01,  ..., -1.49e+00,  1.04e+00]])

In [11]:
data_normalized.size()

torch.Size([4898, 11])

In [12]:
np.unique(target.numpy())

array([3, 4, 5, 6, 7, 8, 9])

In [13]:
np.unique(target.numpy() - 3)

array([0, 1, 2, 3, 4, 5, 6])

In [14]:
target_shift = target - 3

In [15]:
np.unique(target_shift.numpy())

array([0, 1, 2, 3, 4, 5, 6])

In [16]:
n_samples = data_normalized.shape[0]
n_val = int(0.3 * n_samples)

shuffled_indices = torch.randperm(n_samples)

train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]

train_x = data_normalized[train_indices]
train_y = target_shift[train_indices]

val_x = data_normalized[val_indices]
val_y = target_shift[val_indices]

In [17]:
train_x.size(), train_y.size()

(torch.Size([3429, 11]), torch.Size([3429]))

In [18]:
def training_loop(model, n_epochs, optimizer, loss_fn, train_x, val_x, train_y, val_y):
    for epoch in range(1, n_epochs + 1):
        
        train_t_p = model(train_x) # ya no tenemos que pasar los params
        train_loss = loss_fn(train_t_p, train_y)
        
        with torch.no_grad(): # todos los args requires_grad=False
            val_t_p = model(val_x)
            val_loss = loss_fn(val_t_p, val_y)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if epoch == 1 or epoch % 1000 == 0:
            print(f"Epoch {epoch}, Training loss {train_loss}, Validation loss {val_loss}")

In [19]:
import torch.nn as nn
import torch.optim as optim

In [20]:
class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(11, 32)
        self.fc2 = nn.Linear(32, 7)

        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = torch.softmax(x, dim=1)
        return x


net = SimpleNet()
optimizer = optim.SGD(net.parameters(), lr=5e-3, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [21]:
%%time
training_loop(
    n_epochs=1000,
    optimizer=optimizer,
    model=net,
    loss_fn=criterion,
    train_x = train_x,
    val_x = val_x,
    train_y = train_y,
    val_y = val_y)

Epoch 1, Training loss 1.9508408308029175, Validation loss 1.9508657455444336
Epoch 1000, Training loss 1.6405816078186035, Validation loss 1.6385960578918457
CPU times: user 13.5 s, sys: 8.3 s, total: 21.8 s
Wall time: 2.01 s
