# Pytorch for Boston Classification

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

from torch.utils.data import (
    Dataset, DataLoader
)

from sklearn.model_selection import train_test_split

from eleven.random import random_seeds
from eleven.data import create_boston_classification

In [2]:
# Some convenient formatting
torch.set_printoptions(precision=2, sci_mode=True)

In [3]:
device = torch.device('cpu')

In [4]:
x, y = create_boston_classification()

In [5]:
x = torch.tensor(x.values, dtype=torch.float32)
y = torch.tensor(y.values)

In [6]:
# Scikit-learn can handle any array like objects, including torch tensors
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.33, random_state=42
)

## Creating  a Pytorch Dataset

In [7]:
class Boston(Dataset):
    """Boston Classification
    
    Pytorch datasets require two methods:
    `__len__`, returns the number of samples 
    in the dataset, and `__getitem__`, which
    returns the next sample of data given by an 
    index `idx`.
    
    Args:
        x: features
        y: labels
    """
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __repr__(self):
        return f"Boston(x={self.x.shape}, y={self.y.shape})"
        
    def __len__(self):
        return len(self.x)
        
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [8]:
train_data = Boston(x_train, y_train)
valid_data = Boston(x_test, y_test)

In [9]:
print(f"Train Dataset: {train_data}")
print(f"Valid Dataset: {valid_data}")

Train Dataset: Boston(x=torch.Size([339, 13]), y=torch.Size([339]))
Valid Dataset: Boston(x=torch.Size([167, 13]), y=torch.Size([167]))


## A Linear Model, Multi-Class Logistic Regression

In [10]:
class LinearModel(nn.Module):
    
    def __init__(self, input_dim, num_classes):
        super(LinearModel, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        x = self.fc(x)
        x = self.softmax(x)
        return x

In [11]:
# Our features have 13 dimensions. Our model will 
# learn weigths for each of those features. 
model = LinearModel(13, 3)

## DataLoaders

In [12]:
train_loader = DataLoader(train_data, batch_size=1)
valid_loader = DataLoader(valid_data, batch_size=1)

### DataLoader Usage:

We iterate over the dataloader, getting batches of size `batch_size`, and hand that to our model, getting softmax scores of size `(batch_size, num_classes)`.

In [13]:
for idx, (data, target) in enumerate(train_loader):
    softmax_scores = model(data)
    print(f"Model Output Shape: {softmax_scores.shape}")
    if idx == 0:
        break

Model Output Shape: torch.Size([1, 3])


### Repeatable Minibatches

If you would like have reproducible minibatches, you can use the `random_seeds` context manager I added in `eleven.random`:

In [14]:
with random_seeds():
    for idx, (data, target) in enumerate(train_loader):
        print(f"Softmax Scores: {model(data)}")
        if idx == 4:
            break

Softmax Scores: tensor([[0.00e+00, -1.29e+02, -1.55e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[0.00e+00, -1.42e+02, -7.96e+01]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[0.00e+00, -1.45e+02, -7.59e+01]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[0.00e+00, -1.50e+02, -1.72e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[0.00e+00, -1.47e+02, -9.81e+01]], grad_fn=<LogSoftmaxBackward>)


## Training Loop

In [15]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [16]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [25]:
train_data = Boston(x_train, y_train)
valid_data = Boston(x_test, y_test)

train_loader = DataLoader(train_data, batch_size=32)
valid_loader = DataLoader(valid_data, batch_size=1)

linear_model = LinearModel(13, 3)
optimizer = optim.Adam(linear_model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)


with random_seeds():
    for epoch in range(500):
        train(linear_model, device, train_loader, optimizer, epoch)
        test(linear_model, device, valid_loader)

Test set: Average loss: 94.0044, Accuracy: 37/167 (22%)

Test set: Average loss: 79.3981, Accuracy: 37/167 (22%)

Test set: Average loss: 64.7922, Accuracy: 37/167 (22%)

Test set: Average loss: 50.2138, Accuracy: 36/167 (22%)

Test set: Average loss: 36.7250, Accuracy: 30/167 (18%)

Test set: Average loss: 26.0960, Accuracy: 39/167 (23%)

Test set: Average loss: 21.4169, Accuracy: 44/167 (26%)

Test set: Average loss: 19.9212, Accuracy: 38/167 (23%)

Test set: Average loss: 18.5720, Accuracy: 35/167 (21%)

Test set: Average loss: 17.3575, Accuracy: 26/167 (16%)

Test set: Average loss: 15.9125, Accuracy: 25/167 (15%)

Test set: Average loss: 14.3047, Accuracy: 27/167 (16%)

Test set: Average loss: 12.7598, Accuracy: 29/167 (17%)

Test set: Average loss: 11.3224, Accuracy: 26/167 (16%)

Test set: Average loss: 9.9937, Accuracy: 25/167 (15%)

Test set: Average loss: 8.7329, Accuracy: 26/167 (16%)

Test set: Average loss: 7.5374, Accuracy: 32/167 (19%)

Test set: Average loss: 6.4825, Ac

Test set: Average loss: 0.7301, Accuracy: 111/167 (66%)

Test set: Average loss: 0.7256, Accuracy: 111/167 (66%)

Test set: Average loss: 0.7212, Accuracy: 111/167 (66%)

Test set: Average loss: 0.7171, Accuracy: 112/167 (67%)

Test set: Average loss: 0.7131, Accuracy: 113/167 (68%)

Test set: Average loss: 0.7093, Accuracy: 113/167 (68%)

Test set: Average loss: 0.7057, Accuracy: 114/167 (68%)

Test set: Average loss: 0.7022, Accuracy: 114/167 (68%)

Test set: Average loss: 0.6989, Accuracy: 114/167 (68%)

Test set: Average loss: 0.6957, Accuracy: 115/167 (69%)

Test set: Average loss: 0.6926, Accuracy: 117/167 (70%)

Test set: Average loss: 0.6897, Accuracy: 117/167 (70%)

Test set: Average loss: 0.6868, Accuracy: 117/167 (70%)

Test set: Average loss: 0.6841, Accuracy: 117/167 (70%)

Test set: Average loss: 0.6816, Accuracy: 117/167 (70%)

Test set: Average loss: 0.6791, Accuracy: 116/167 (69%)

Test set: Average loss: 0.6767, Accuracy: 116/167 (69%)

Test set: Average loss: 0.6744,

Test set: Average loss: 0.6202, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6200, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6198, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6196, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6194, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6192, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6190, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6189, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6187, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6185, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6184, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6182, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6181, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6180, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6178, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6177, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6176, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6175,

Test set: Average loss: 0.6154, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6154, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6155, Accuracy: 127/167 (76%)

Test set: Average loss: 0.6155, Accuracy: 127/167 (76%)

Test set: Average loss: 0.6156, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6156,

Test set: Average loss: 0.6163, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6163, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6163, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6164, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6164,

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6162, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6161,

In [20]:
class MLP(nn.Module):
    
    def __init__(self, input_dim, hid_dim, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hid_dim)
        self.fc2 = nn.Linear(hid_dim, num_classes)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.softmax(x)

In [29]:
mlp = MLP(13, 10, 3)
optimizer = optim.Adam(mlp.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)


with random_seeds():
    for epoch in range(500):
        train(mlp, device, train_loader, optimizer, epoch)
        test(mlp, device, valid_loader)

Test set: Average loss: 34.0999, Accuracy: 64/167 (38%)

Test set: Average loss: 21.4798, Accuracy: 57/167 (34%)

Test set: Average loss: 16.1400, Accuracy: 55/167 (33%)

Test set: Average loss: 11.1404, Accuracy: 58/167 (35%)

Test set: Average loss: 5.7628, Accuracy: 53/167 (32%)

Test set: Average loss: 4.2681, Accuracy: 35/167 (21%)

Test set: Average loss: 2.7534, Accuracy: 66/167 (40%)

Test set: Average loss: 1.8204, Accuracy: 78/167 (47%)

Test set: Average loss: 1.4676, Accuracy: 86/167 (51%)

Test set: Average loss: 1.1819, Accuracy: 86/167 (51%)

Test set: Average loss: 1.1406, Accuracy: 90/167 (54%)

Test set: Average loss: 1.0892, Accuracy: 89/167 (53%)

Test set: Average loss: 1.0690, Accuracy: 92/167 (55%)

Test set: Average loss: 1.0407, Accuracy: 92/167 (55%)

Test set: Average loss: 1.0106, Accuracy: 94/167 (56%)

Test set: Average loss: 0.9879, Accuracy: 96/167 (57%)

Test set: Average loss: 0.9631, Accuracy: 96/167 (57%)

Test set: Average loss: 0.9423, Accuracy: 98

Test set: Average loss: 0.6664, Accuracy: 119/167 (71%)

Test set: Average loss: 0.6660, Accuracy: 120/167 (72%)

Test set: Average loss: 0.6656, Accuracy: 121/167 (72%)

Test set: Average loss: 0.6652, Accuracy: 122/167 (73%)

Test set: Average loss: 0.6648, Accuracy: 121/167 (72%)

Test set: Average loss: 0.6645, Accuracy: 121/167 (72%)

Test set: Average loss: 0.6641, Accuracy: 121/167 (72%)

Test set: Average loss: 0.6638, Accuracy: 121/167 (72%)

Test set: Average loss: 0.6635, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6631, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6628, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6625, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6621, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6618, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6616, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6613, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6610, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6607,

Test set: Average loss: 0.6498, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6496, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6495, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6494, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6492, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6491, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6490, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6489, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6488, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6487, Accuracy: 123/167 (74%)

Test set: Average loss: 0.6485, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6484, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6484, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6483, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6481, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6480, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6478, Accuracy: 124/167 (74%)

Test set: Average loss: 0.6478,

Test set: Average loss: 0.6466, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6465, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6464, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6463, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6462, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6461, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6460, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6460, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6459, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6458, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6458, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6457, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6456, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6455, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6454, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6453, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6454, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6453,

Test set: Average loss: 0.6403, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6402, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6401, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6401, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6401, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6400, Accuracy: 129/167 (77%)

Test set: Average loss: 0.6399, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6398, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6397, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6396, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6395, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6395, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6395, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6393, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6392, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6392, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6391, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6390,

Test set: Average loss: 0.6353, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6353, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6352, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6352, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6351, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6351, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6350, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6350, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6349, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6348, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6347, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6347, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6347, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6346, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6345, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6345, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6344, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6344,