In [1]:
import torch
import torch.nn as nn
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import numpy as np
import matplotlib.pyplot as plt

## Steps
0) Collect data
1) Model
2) Loss
3) Optimizer
4) Training loop

### 0. Collect data

In [2]:
bc = datasets.load_breast_cancer()
X_np, y_np = bc.data, bc.target

# Convert to tensor
X = torch.from_numpy(X_np.astype(np.float32))
y = torch.from_numpy(y_np.astype(np.float32))

# # Reshaping for outputs into rank 2 tensor
y = y.reshape(y_np.shape[0],1)


In [3]:
n_samples, n_features = X.shape

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=20)

In [5]:
scaler = StandardScaler() # 0 mean and variance = 1 = std_dev
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))

### 1. Model

In [6]:
class LogisticRegression(nn.Module):
    def __init__(self, n_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(in_features = n_features, out_features = 1)
        self.optim = None
        self.loss_obj = None
    
    def forward(self,x):
        return(torch.sigmoid(self.linear(x)))               # Or create an object from nn.Sigmoid and use that instead of torch.sigmoid()

    def loss_func(self, loss_func = nn.BCELoss):
        self.loss_obj = loss_func()                         # Object of the loss function in torch.nn 

    def optimizer(self, optimizer, learning_rate = 0.01):
        self.optim = optimizer(self.linear.parameters(),lr = learning_rate)

    
    def fit(self, X, y, epochs = 100, print_statements = False):
        for epoch in range(epochs):
            y_pred = self.forward(X)                        # Forward prop
            loss = self.loss_obj(y_pred,y)                  # Compute loss
            loss.backward()                                 # Compute local gradients
            self.optim.step()                               # Update parameters
            self.optim.zero_grad()                          # Zero the gradients
            if(print_statements and epoch%(epochs//10)==0):
                print(f'Epoch {epoch+1}; Loss = {loss.item():0.4f}')

In [7]:
predict = LogisticRegression(n_features)

### 2,3. Loss and Optimizer

In [8]:
predict.loss_func(nn.BCELoss)
predict.optimizer(torch.optim.SGD, learning_rate=0.01)

### 4. Training loop

In [9]:
predict.fit(X_train,y_train, epochs = 1500)

### 5. Check Predictions

In [10]:
y_predicted = predict.forward(X_train).detach().numpy()

In [11]:
print('Training data metrics -\nConfusion Matrix')
print(confusion_matrix(np.round(y_predicted), y_train.detach().numpy()))
print(f'Accuracy = {100*accuracy_score(np.round(y_predicted), y_train.detach().numpy()):.2f}%')

Training data metrics -
Confusion Matrix
[[157   3]
 [  7 288]]
Accuracy = 97.80%


In [12]:
y_predicted = predict.forward(X_test).detach().numpy()

In [13]:
print('Testing data metrics -\nConfusion Matrix')
print(confusion_matrix(np.round(y_predicted), y_test.detach().numpy()))
print(f'Accuracy = {100*accuracy_score(np.round(y_predicted), y_test.detach().numpy()):.2f}%')

Testing data metrics -
Confusion Matrix
[[47  0]
 [ 1 66]]
Accuracy = 99.12%


In [14]:
torch.save(predict,'models/logistic_regressor.pt')