# Pytorch for Boston Classification

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

from torch.utils.data import (
    Dataset, DataLoader
)

from sklearn.model_selection import train_test_split

from eleven.random import random_seeds
from eleven.data import create_boston_classification

In [2]:
# Some convenient formatting
torch.set_printoptions(precision=2, sci_mode=True)

In [3]:
device = torch.device('cpu')

In [4]:
x, y = create_boston_classification()

In [5]:
x = torch.tensor(x.values, dtype=torch.float32)
y = torch.tensor(y.values)

In [6]:
# Scikit-learn can handle any array like objects, including torch tensors
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.33, random_state=42
)

## Creating  a Pytorch Dataset

In [7]:
class Boston(Dataset):
    """Boston Classification
    
    Pytorch datasets require two methods:
    `__len__`, returns the number of samples 
    in the dataset, and `__getitem__`, which
    returns the next sample of data given by an 
    index `idx`.
    
    Args:
        x: features
        y: labels
    """
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __repr__(self):
        return f"Boston(x={self.x.shape}, y={self.y.shape})"
        
    def __len__(self):
        return len(self.x)
        
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [8]:
train_data = Boston(x_train, y_train)
valid_data = Boston(x_test, y_test)

In [9]:
print(f"Train Dataset: {train_data}")
print(f"Valid Dataset: {valid_data}")

Train Dataset: Boston(x=torch.Size([339, 13]), y=torch.Size([339]))
Valid Dataset: Boston(x=torch.Size([167, 13]), y=torch.Size([167]))


## A Linear Model, Multi-Class Logistic Regression

In [10]:
class LinearModel(nn.Module):
    
    def __init__(self, input_dim, num_classes):
        super(LinearModel, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        x = self.fc(x)
        x = self.softmax(x)
        return x

In [11]:
# Our features have 13 dimensions. Our model will 
# learn weigths for each of those features. 
model = LinearModel(13, 3)

## DataLoaders

In [12]:
train_loader = DataLoader(train_data, batch_size=1)
valid_loader = DataLoader(valid_data, batch_size=1)

### DataLoader Usage:

We iterate over the dataloader, getting batches of size `batch_size`, and hand that to our model, getting softmax scores of size `(batch_size, num_classes)`.

In [13]:
for idx, (data, target) in enumerate(train_loader):
    softmax_scores = model(data)
    print(f"Model Output Shape: {softmax_scores.shape}")
    if idx == 0:
        break

Model Output Shape: torch.Size([1, 3])


### Repeatable Minibatches

If you would like have reproducible minibatches, you can use the `random_seeds` context manager I added in `eleven.random`:

In [14]:
with random_seeds():
    for idx, (data, target) in enumerate(train_loader):
        print(f"Softmax Scores: {model(data)}")
        if idx == 4:
            break

Softmax Scores: tensor([[-2.79e+02, 0.00e+00, -2.28e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[-1.62e+02, 0.00e+00, -1.84e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[-1.73e+02, 0.00e+00, -1.94e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[-2.95e+02, 0.00e+00, -2.48e+02]], grad_fn=<LogSoftmaxBackward>)
Softmax Scores: tensor([[-1.93e+02, 0.00e+00, -2.02e+02]], grad_fn=<LogSoftmaxBackward>)


## Training Loop

In [15]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [16]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [30]:
train_data = Boston(x_train, y_train)
valid_data = Boston(x_test, y_test)

train_loader = DataLoader(train_data, batch_size=32)
valid_loader = DataLoader(valid_data, batch_size=1)

model = LinearModel(13, 3)
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)


with random_seeds():
    for epoch in range(500):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, valid_loader)

Test set: Average loss: 25.6150, Accuracy: 92/167 (55%)

Test set: Average loss: 22.1929, Accuracy: 83/167 (50%)

Test set: Average loss: 18.5960, Accuracy: 76/167 (46%)

Test set: Average loss: 14.2135, Accuracy: 80/167 (48%)

Test set: Average loss: 9.8361, Accuracy: 83/167 (50%)

Test set: Average loss: 6.0394, Accuracy: 90/167 (54%)

Test set: Average loss: 4.7998, Accuracy: 94/167 (56%)

Test set: Average loss: 4.0221, Accuracy: 100/167 (60%)

Test set: Average loss: 3.8785, Accuracy: 99/167 (59%)

Test set: Average loss: 3.7933, Accuracy: 100/167 (60%)

Test set: Average loss: 3.5164, Accuracy: 102/167 (61%)

Test set: Average loss: 3.3269, Accuracy: 102/167 (61%)

Test set: Average loss: 3.1796, Accuracy: 100/167 (60%)

Test set: Average loss: 2.9953, Accuracy: 101/167 (60%)

Test set: Average loss: 2.8240, Accuracy: 102/167 (61%)

Test set: Average loss: 2.6488, Accuracy: 102/167 (61%)

Test set: Average loss: 2.4829, Accuracy: 102/167 (61%)

Test set: Average loss: 2.3197, Acc

Test set: Average loss: 0.7716, Accuracy: 122/167 (73%)

Test set: Average loss: 0.7682, Accuracy: 122/167 (73%)

Test set: Average loss: 0.7649, Accuracy: 122/167 (73%)

Test set: Average loss: 0.7617, Accuracy: 123/167 (74%)

Test set: Average loss: 0.7584, Accuracy: 123/167 (74%)

Test set: Average loss: 0.7553, Accuracy: 123/167 (74%)

Test set: Average loss: 0.7522, Accuracy: 124/167 (74%)

Test set: Average loss: 0.7491, Accuracy: 124/167 (74%)

Test set: Average loss: 0.7461, Accuracy: 124/167 (74%)

Test set: Average loss: 0.7431, Accuracy: 124/167 (74%)

Test set: Average loss: 0.7402, Accuracy: 124/167 (74%)

Test set: Average loss: 0.7374, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7345, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7318, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7290, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7263, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7237, Accuracy: 125/167 (75%)

Test set: Average loss: 0.7211,

Test set: Average loss: 0.6259, Accuracy: 127/167 (76%)

Test set: Average loss: 0.6254, Accuracy: 127/167 (76%)

Test set: Average loss: 0.6249, Accuracy: 127/167 (76%)

Test set: Average loss: 0.6244, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6240, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6235, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6231, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6227, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6223, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6219, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6216, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6212, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6208, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6205, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6202, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6199, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6195, Accuracy: 128/167 (77%)

Test set: Average loss: 0.6192,

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6114, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6113, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6113, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6113, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6113,

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6115, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 130/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116, Accuracy: 131/167 (78%)

Test set: Average loss: 0.6116,

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118, Accuracy: 133/167 (80%)

Test set: Average loss: 0.6118,