In [None]:
import torch
import torch.nn as nn
from sklearn.datasets import make_classification

In [None]:
X, y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42
)

In [None]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [None]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [None]:
# convert data to pytorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, idx):
        # custom transformations here
        return self.features[idx], self.labels[idx]

In [None]:
dataset = CustomDataset(X, y)

In [None]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
for batch_features, batch_labels in dataloader:
    print(batch_features, batch_labels)

tensor([[ 1.7774,  1.5116],
        [-1.1402, -0.8388]]) tensor([1., 0.])
tensor([[-0.5872, -1.9717],
        [ 1.0683, -0.9701]]) tensor([0., 1.])
tensor([[-0.9382, -0.5430],
        [-0.7206, -0.9606]]) tensor([1., 0.])
tensor([[-1.9629, -0.9923],
        [-2.8954,  1.9769]]) tensor([0., 0.])
tensor([[ 1.7273, -1.1858],
        [ 1.8997,  0.8344]]) tensor([1., 1.])


## Testing on Previously Used Data

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv")
# df.head()

In [None]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)
# df.head()

In [None]:
X = df.iloc[:, 1:]
y = df.iloc[:, 0]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [None]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

## Dataset using CustomDataset and DataLoader

In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [None]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

## Model

In [None]:
class myNN(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out

In [None]:
learning_rate = 0.1
epochs = 20

model = myNN(X_train_tensor.shape[1])

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_function = nn.BCELoss()

for epoch in range(epochs):
    for batch_features, batch_labels in train_loader:
        y_pred = model(batch_features)
        loss = loss_function(y_pred, batch_labels.view(-1, 1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"epoch = {epoch+1}, loss = {loss.item()}")

epoch = 1, loss = 0.35184934735298157
epoch = 2, loss = 0.24164877831935883
epoch = 3, loss = 0.22913041710853577
epoch = 4, loss = 0.07281435281038284
epoch = 5, loss = 0.08481625467538834
epoch = 6, loss = 0.08701272308826447
epoch = 7, loss = 0.20078971982002258
epoch = 8, loss = 0.31781837344169617
epoch = 9, loss = 0.36114391684532166
epoch = 10, loss = 0.20244203507900238
epoch = 11, loss = 0.012497988529503345
epoch = 12, loss = 0.06532870978116989
epoch = 13, loss = 0.5755361318588257
epoch = 14, loss = 0.18403908610343933
epoch = 15, loss = 0.047894593328237534
epoch = 16, loss = 0.047666944563388824
epoch = 17, loss = 0.007148277014493942
epoch = 18, loss = 0.39368611574172974
epoch = 19, loss = 0.0015080730663612485
epoch = 20, loss = 0.0017784826923161745


## Model Evaluation

In [None]:
# model evaluation using test_loader
model.eval()
test_acc = []

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.5).float()
        batch_acc = (y_pred.view(-1) == batch_labels).float().mean().item()
        test_acc.append(batch_acc)

    overall_acc = np.mean(test_acc)
    print(f"Overall Accuracy: {overall_acc:.3f}")

Overall Accuracy: 0.992
