# Logistic Regression

Based on **Patric Loeber** video: https://www.youtube.com/watch?v=c36lUUr864M&t=5970s

## Typical PyTorch pipeline

1) Design model (input, output size, forward pass)
2) Construct loss and optimizer
3) Training loop
    + forward pass: compute prediction and loss
    + backward pass: gradients
    + update weights

In [9]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 0) prepare data
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape
print(f"samples: {n_samples}, features: {n_features}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scale features
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# convert to torch tensors
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

# row vector to column vector
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

samples: 569, features: 30


sc equals standard calar which will make our features to have zero mean and unit variance. This is always recommended to do when we want to deal with a logistic regression.

In [16]:
# 1) model
# f = wx + b, sigmoid function at the end
class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted
    
model = LogisticRegression(n_features)

# 2) loss and optimizer
# BCELoss -> Binary Cross Entropy Loss
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) training loop
num_epochs = 2000

for epoch in range(num_epochs):
    # forward pass and loss calculation
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # backward pass
    loss.backward()
    
    # updates weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch+1) % 200 == 0:
        print(f"epoch: {epoch+1}, loss = {loss.item():.4f}")

epoch: 200, loss = 0.1652
epoch: 400, loss = 0.1233
epoch: 600, loss = 0.1044
epoch: 800, loss = 0.0932
epoch: 1000, loss = 0.0857
epoch: 1200, loss = 0.0802
epoch: 1400, loss = 0.0760
epoch: 1600, loss = 0.0725
epoch: 1800, loss = 0.0697
epoch: 2000, loss = 0.0672


In [17]:
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"accuracy = {acc:.4f}")

accuracy = 0.9649
