In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
# Load dataset
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)


In [None]:
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [None]:

encoder = LabelEncoder()
y = encoder.fit_transform(y)


In [None]:
# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Standardizing features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Creating a custom Dataset class
class CancerDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Make y a column tensor

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


In [None]:
# Creating Dataset objects
train_dataset = CancerDataset(X_train, y_train)
test_dataset = CancerDataset(X_test, y_test)

In [None]:
# Creating DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
# Defining the neural network class
class MySimpleNN(nn.Module):
    def __init__(self, num_features):
        super(MySimpleNN, self).__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.linear(x))


In [None]:
# Model, loss function, and optimizer
model = MySimpleNN(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


In [None]:
# Training loop
epochs = 25
for epoch in range(epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 0.43012887239456177
Epoch 2, Loss: 0.4478466808795929
Epoch 3, Loss: 0.15834148228168488
Epoch 4, Loss: 0.08683162927627563
Epoch 5, Loss: 0.18913865089416504
Epoch 6, Loss: 0.14486488699913025
Epoch 7, Loss: 0.045602671802043915
Epoch 8, Loss: 0.09331979602575302
Epoch 9, Loss: 0.053154390305280685
Epoch 10, Loss: 0.18181012570858002
Epoch 11, Loss: 0.04452797770500183
Epoch 12, Loss: 0.015227101743221283
Epoch 13, Loss: 0.14046086370944977
Epoch 14, Loss: 0.1569983810186386
Epoch 15, Loss: 0.008774095214903355
Epoch 16, Loss: 0.12053973972797394
Epoch 17, Loss: 0.09193510562181473
Epoch 18, Loss: 0.06573557108640671
Epoch 19, Loss: 0.11727438867092133
Epoch 20, Loss: 0.0754910483956337
Epoch 21, Loss: 0.10087122768163681
Epoch 22, Loss: 0.092796191573143
Epoch 23, Loss: 0.11075679212808609
Epoch 24, Loss: 0.012923650443553925
Epoch 25, Loss: 0.011564143002033234


In [None]:
# Evaluation
with torch.no_grad():
    correct = 0
    total = 0
    for batch_X, batch_y in test_loader:
        y_pred = model(batch_X)
        y_pred_class = (y_pred > 0.5).float()
        correct += (y_pred_class == batch_y).sum().item()
        total += batch_y.size(0)
    accuracy = correct / total
    print(f'Accuracy: {accuracy}')


Accuracy: 0.9824561403508771
