In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# 0 data prepocessing

bc = datasets.load_breast_cancer()
print(bc.keys())
X, y = bc.data, bc.target

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])


In [3]:
n_samples, n_features = X.shape
print(n_samples, n_features)

569 30


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [5]:
# scale, the data will have zero mean and unit variance

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(-1, 1)
y_test = y_test.view(-1, 1)

In [10]:
X_train[:5]

tensor([[-0.3618, -0.2652, -0.3172, -0.4671,  1.8038,  1.1817, -0.5169,  0.1065,
         -0.3901,  1.3914,  0.1437, -0.1208,  0.1601, -0.1326, -0.5863, -0.1248,
         -0.5787,  0.1091, -0.2819, -0.1889, -0.2571, -0.2403, -0.2442, -0.3669,
          0.5449,  0.2481, -0.7109, -0.0797, -0.5280,  0.2506],
        [-0.8633,  0.7156, -0.8565, -0.7967, -0.0586, -0.4285, -0.5170, -0.6814,
          0.7948,  0.3882, -0.4545,  0.4009, -0.4357, -0.5216, -1.1631,  0.2724,
          0.0675, -0.2392,  1.1130,  0.3502, -0.8894,  0.3847, -0.8880, -0.7897,
         -1.0429, -0.4824, -0.5631, -0.7698,  0.4431, -0.2099],
        [-0.4334,  0.3251, -0.4129, -0.5036,  0.2029,  0.3169,  0.2114,  0.2923,
         -0.2941,  1.1295, -0.2249,  0.9890, -0.0743, -0.4596,  1.8909,  0.8176,
          0.5919,  1.7726,  0.1356,  0.7924, -0.6160, -0.0636, -0.5528, -0.6284,
         -0.1823, -0.1924, -0.2601, -0.0660, -1.1169,  0.0329],
        [-0.4191,  1.0410, -0.3904, -0.4502,  1.1198,  0.4183,  0.2901,  0.5127

In [11]:
y_train[:5]

tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.]])

In [6]:
# 1 model
# f = wx + b, sigmoid at the end

class LogisticRegression(nn.Module):

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = LogisticRegression(n_features)

In [7]:
# 2 loss and optimizer

lr = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = lr)

In [8]:
# 3 training loop
num_epochs = 100

for epoch in range(num_epochs):
    # forward pass and loss
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    # backward pass
    loss.backward()

    # updates
    optimizer.step()

    # zero gradients
    optimizer.zero_grad()

    if (epoch+1) % 10 ==0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

epoch: 10, loss = 0.7317
epoch: 20, loss = 0.5813
epoch: 30, loss = 0.4892
epoch: 40, loss = 0.4275
epoch: 50, loss = 0.3832
epoch: 60, loss = 0.3499
epoch: 70, loss = 0.3236
epoch: 80, loss = 0.3024
epoch: 90, loss = 0.2847
epoch: 100, loss = 0.2698


In [9]:
with torch.no_grad():
    y_pred = model(X_test)
    print(y_pred[:5])
    y_pred_cls = y_pred.round()
    print(y_pred_cls[:5])
    acc = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')

tensor([[0.8022],
        [0.9077],
        [0.3040],
        [0.8795],
        [0.7495]])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.]])
accuracy = 0.8947
