# Breast Cancer Prediction

Import dependencies

In [1]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim

In [2]:
torch.__version__

'2.2.1+cu121'

creating a system agnostic

In [3]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'

In [4]:
device

'gpu'

In [5]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
print(f'Using device: {device}')

Using device: cuda


**Data collection and Preprocessing**

In [9]:
# load the breast cancer datasets
df = load_breast_cancer()
df

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [10]:
df.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [12]:
X=df.data
y=df.target

In [13]:
# split the data into training and testing set
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((455, 30), (114, 30), (455,), (114,))

In [14]:
#standardized the data
scaler = StandardScaler()
X_train_scaled= scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
type(X_train_scaled)

numpy.ndarray

In [17]:
# convert data to Pytorch Tensors and move it to GPU
X_train_scaled = torch.tensor(X_train_scaled,dtype=torch.float32).to(device)
X_test_scaled = torch.tensor(X_test_scaled,dtype=torch.float32).to(device)
y_train = torch.tensor(y_train,dtype=torch.float32).to(device)
y_test = torch.tensor(y_test,dtype=torch.float32).to(device)


  X_train_scaled = torch.tensor(X_train_scaled,dtype=torch.float32).to(device)
  X_test_scaled = torch.tensor(X_test_scaled,dtype=torch.float32).to(device)
  y_train = torch.tensor(y_train,dtype=torch.float32).to(device)


In [20]:
X_train_scaled.shape

torch.Size([455, 30])

**Neural Network Architecture**

In [23]:
# define the neural network architecture

class NeuralNet(nn.Module):

  def __init__(self,input_size,hidden_size,output_size):
    super(NeuralNet,self).__init__()
    self.fcl=nn.Linear(in_features=input_size,out_features=hidden_size)
    self.relu = nn.ReLU()
    self.rc2=nn.Linear(in_features=hidden_size,out_features=output_size)
    self.sigmoid =nn.Sigmoid()

  def forward(self,x):
    out = self.fcl(x)
    out = self.relu(out)
    out = self.rc2(out)
    out =self.sigmoid(out)
    return out


**Adding loss function and Optimizer**

In [26]:
#define hyperparameter
input_size = X_train_scaled.shape[1]
hidden_size = 64
output_size=1
learning_rate =0.001
num_epochs = 100

In [27]:
# create a model instance
model0 = NeuralNet(input_size,hidden_size,output_size).to(device)
model0

NeuralNet(
  (fcl): Linear(in_features=30, out_features=64, bias=True)
  (relu): ReLU()
  (rc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [28]:
# define loss and optimizer
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model0.parameters(),lr=learning_rate)

# Model Training loop

In [34]:
epoch_count = []
train_loss_values = []
test_loss_values =[]
for epoch in range(num_epochs):

  model0.train()

  # forward pass
  y_pred = model0(X_train_scaled).squeeze()

  # loss function
  loss_train  = loss_fn(y_pred,y_train)

  # zero grad
  optimizer.zero_grad()

  # back propagation
  loss_train.backward()

  # optim step
  optimizer.step()

  # evaluate mode
  model0.eval()
  with torch.inference_mode():
    y_pred_test =model0(X_test_scaled).squeeze()

    # calculate loss
    test_loss = loss_fn(y_pred_test,y_test)

    if epoch%10==0:

      print(f'Epoch: {epoch} | Train Loss: {loss_train} | Test Loss: {test_loss}')



Epoch: 0 | Train Loss: 0.11080579459667206 | Test Loss: 0.09593657404184341
Epoch: 10 | Train Loss: 0.10252940654754639 | Test Loss: 0.0893140435218811
Epoch: 20 | Train Loss: 0.09554945677518845 | Test Loss: 0.08394574373960495
Epoch: 30 | Train Loss: 0.0895955041050911 | Test Loss: 0.07959111779928207
Epoch: 40 | Train Loss: 0.0844506323337555 | Test Loss: 0.07599398493766785
Epoch: 50 | Train Loss: 0.07992800325155258 | Test Loss: 0.07298333197832108
Epoch: 60 | Train Loss: 0.0759580135345459 | Test Loss: 0.07043557614088058
Epoch: 70 | Train Loss: 0.0724836215376854 | Test Loss: 0.06825747340917587
Epoch: 80 | Train Loss: 0.0694054663181305 | Test Loss: 0.0664316788315773
Epoch: 90 | Train Loss: 0.06662949174642563 | Test Loss: 0.06490281224250793
