In [29]:
import torch
import torch.nn as nn
import sklearn as sk
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [30]:
bc_raw_data = datasets.load_breast_cancer()

X = bc_raw_data["data"]
y = bc_raw_data["target"]

In [31]:
RANDOM_SEED = 42

In [32]:
print(f"{type(X)=}")
print(f"{type(y)=}")

print(f"{len(X)=}")
print(f"{len(y)=}")
print(f"{X.shape=}")
print(f"{y.shape=}")

print(f"One X sample: {X[0]=}")
print(f"One Y sample: {y[0]=}")

type(X)=<class 'numpy.ndarray'>
type(y)=<class 'numpy.ndarray'>
len(X)=569
len(y)=569
X.shape=(569, 30)
y.shape=(569,)
One X sample: X[0]=array([1.799e+01, 1.038e+01, 1.228e+02, 1.001e+03, 1.184e-01, 2.776e-01,
       3.001e-01, 1.471e-01, 2.419e-01, 7.871e-02, 1.095e+00, 9.053e-01,
       8.589e+00, 1.534e+02, 6.399e-03, 4.904e-02, 5.373e-02, 1.587e-02,
       3.003e-02, 6.193e-03, 2.538e+01, 1.733e+01, 1.846e+02, 2.019e+03,
       1.622e-01, 6.656e-01, 7.119e-01, 2.654e-01, 4.601e-01, 1.189e-01])
One Y sample: y[0]=0


In [33]:
print(f"{X.min()=}")
print(f"{X.max()=}")
print(f"{X.std()=}")

X.min()=0.0
X.max()=4254.0
X.std()=228.29740508276657


In [34]:
print(f"{X[:,0]=}")
print(f"{X[:,0].min()=}")
print(f"{X[:,0].max()=}")

X[:,0]=array([17.99 , 20.57 , 19.69 , 11.42 , 20.29 , 12.45 , 18.25 , 13.71 ,
       13.   , 12.46 , 16.02 , 15.78 , 19.17 , 15.85 , 13.73 , 14.54 ,
       14.68 , 16.13 , 19.81 , 13.54 , 13.08 ,  9.504, 15.34 , 21.16 ,
       16.65 , 17.14 , 14.58 , 18.61 , 15.3  , 17.57 , 18.63 , 11.84 ,
       17.02 , 19.27 , 16.13 , 16.74 , 14.25 , 13.03 , 14.99 , 13.48 ,
       13.44 , 10.95 , 19.07 , 13.28 , 13.17 , 18.65 ,  8.196, 13.17 ,
       12.05 , 13.49 , 11.76 , 13.64 , 11.94 , 18.22 , 15.1  , 11.52 ,
       19.21 , 14.71 , 13.05 ,  8.618, 10.17 ,  8.598, 14.25 ,  9.173,
       12.68 , 14.78 ,  9.465, 11.31 ,  9.029, 12.78 , 18.94 ,  8.888,
       17.2  , 13.8  , 12.31 , 16.07 , 13.53 , 18.05 , 20.18 , 12.86 ,
       11.45 , 13.34 , 25.22 , 19.1  , 12.   , 18.46 , 14.48 , 19.02 ,
       12.36 , 14.64 , 14.62 , 15.37 , 13.27 , 13.45 , 15.06 , 20.26 ,
       12.18 ,  9.787, 11.6  , 14.42 , 13.61 ,  6.981, 12.18 ,  9.876,
       10.49 , 13.11 , 11.64 , 12.36 , 22.27 , 11.34 ,  9.777, 12.63 ,

In [35]:
print(f"Unique values in targets: {np.unique(y)=}")

# Two values: So binary classification

Unique values in targets: np.unique(y)=array([0, 1])


In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

print(f"{len(X_train)=} {len(X_test)=} {len(y_train)= } {len(y_test)=}")

len(X_train)=455 len(X_test)=114 len(y_train)= 455 len(y_test)=114


In [37]:
# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled.mean(axis=0)
X_train_scaled.std(axis=0)


array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [38]:
# Create a dataset
X_train_tensors = torch.from_numpy(X_train_scaled).type(torch.float32)
X_test_tensors = torch.from_numpy(X_test_scaled).type(torch.float32)
y_train_tensors = torch.from_numpy(y_train).type(torch.float32)
y_test_tensors = torch.from_numpy(y_test).type(torch.float32)
print(f"{X_train_tensors.shape}")
print(f"{y_train_tensors.shape}")
print(f"{X_test_tensors.shape}")
print(f"{y_test_tensors.shape}")


torch.Size([455, 30])
torch.Size([455])
torch.Size([114, 30])
torch.Size([114])


In [39]:
# Need to reshape y tensors
y_train_tensors = y_train_tensors.view(y_train_tensors.shape[0], 1)
y_test_tensors = y_test_tensors.view(y_test_tensors.shape[0], 1)

print(f"{y_train_tensors.shape}")
print(f"{y_test_tensors.shape}")


torch.Size([455, 1])
torch.Size([114, 1])


In [40]:
# Create a dataset to feed the dataloader

from torch.utils.data import Dataset, DataLoader

class BCTrainDataset(Dataset):
    
    def __init__(self, X_train, y_train) -> None:
        super().__init__()
        
        self.X = X_train
        self.y = y_train
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self) -> int:
        return len(self.X)
    
class BCTestDataset(Dataset):
    
    def __init__(self, X_test) -> None:
        self.X = X_test
        
    def __getitem__(self, index):
        return self.X[index]
    
    def __len__(self) -> int:
        return len(self.X)

In [41]:
train_dataset = BCTrainDataset(X_train=X_train_tensors, y_train=y_train_tensors)
test_dataset = BCTestDataset(X_test=X_test_tensors)



In [42]:
BATCH_SIZE = 32
torch.manual_seed(RANDOM_SEED)
train_data_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_data_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

X_batch, y_batch = next(iter(train_data_loader))
X_test_batch = next(iter(test_data_loader))

print(f"{X_batch[0]=}, {y_batch[0]=}")
print(f"{X_test_batch[0]=}")


X_batch[0]=tensor([-0.2379, -0.5158, -0.3008, -0.3019, -0.8297, -1.0324, -0.6612, -0.6228,
        -1.1325, -0.8412, -0.3841, -0.5811, -0.3799, -0.3196, -0.3097, -0.7936,
        -0.2879, -0.4291, -0.6933, -0.4710, -0.2246, -0.3330, -0.2389, -0.2930,
        -0.1919, -0.7696, -0.4391, -0.2566, -0.6183, -0.4082]), y_batch[0]=tensor([1.])
X_test_batch[0]=tensor([-0.1095,  0.3644, -0.0614, -0.2097,  1.5793,  1.1909,  0.0815,  0.3131,
         1.4004,  1.6775,  0.6657,  0.2952,  0.5222,  0.2457,  0.6413,  0.2974,
        -0.2135,  0.4765, -0.6886,  0.6258,  0.1916,  0.4080,  0.1286,  0.0576,
         1.4571,  0.7376, -0.0157,  0.6553,  0.4467,  1.7012])


In [43]:
# Build the network here for logistic regression

class BreastCancerNN(nn.Module):
    
    def __init__(self, in_features: int, out_features: int) -> None:
        
        super().__init__()
        
        self.layer_1 = nn.Linear(in_features=in_features, out_features=out_features)
        
    def forward(self, x):
        
        return self.layer_1(x)

In [44]:
model = BreastCancerNN(in_features=X_batch.shape[1], out_features=1)
model

BreastCancerNN(
  (layer_1): Linear(in_features=30, out_features=1, bias=True)
)

In [45]:
loss_fn = nn.BCEWithLogitsLoss() # Because we are in binary classification mode
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [46]:
def accuracy(y_pred, y):
    
    return torch.eq(torch.round(torch.sigmoid(y_pred)), y).sum().item() * 100 / len(y)

In [47]:
print(accuracy(torch.tensor([-0.111, -0.222]), torch.tensor([0, 0])))

100.0


In [48]:
EPOCHS = 50
torch.manual_seed(RANDOM_SEED)

model.train()
for epoch in range(EPOCHS):
    
    epoch_loss, epoch_acc = 0, 0
    
    for X, y in train_data_loader:
        
        y_logits = model(X)
        
        loss = loss_fn(y_logits, y)
        
        epoch_loss += loss.item()
        epoch_acc += accuracy(y_logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    epoch_loss /= len(train_data_loader)
    epoch_acc /= len(train_data_loader)
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch} | loss: {epoch_loss} | accuracy: {epoch_acc:.2f}%")
        
        
        

Epoch 0 | loss: 0.7556765953699748 | accuracy: 41.43%


Epoch 10 | loss: 0.3036485105752945 | accuracy: 94.17%
Epoch 20 | loss: 0.20158259868621825 | accuracy: 95.62%
Epoch 30 | loss: 0.1606461967031161 | accuracy: 97.29%
Epoch 40 | loss: 0.13255473027626674 | accuracy: 97.29%


In [86]:
model.eval()

with torch.inference_mode():
    
    predictions = []
    for batch in test_data_loader:
        logits = model(batch)
        
        predictions.append(torch.round(torch.sigmoid(logits)).squeeze())
        
    predictions = torch.cat(predictions)
    print(predictions.squeeze())
    print(y_test_tensors.squeeze())
    
print(f"Test accuracy: {torch.eq(predictions.squeeze(), y_test_tensors.squeeze()).sum().item() * 100 / len(y_test_tensors) : .2f}%")

tensor([0., 1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1.,
        0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0.,
        0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
        0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 1.,
        1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1.,
        1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1.,
        1., 1., 0., 1., 0., 1.])
tensor([0., 0., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1.,
        0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0.,
        0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
        0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1.,
        1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1.,
        1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1.,
       

In [87]:
MODEL_PATH = "BreastCancerNN.pth"
SCALER_PATH = "BreastCancer_Scaler.bin"

import joblib

torch.save(model.state_dict(), MODEL_PATH)
joblib.dump(scaler, SCALER_PATH)

['BreastCancer_Scaler.bin']