In [2]:
%pip install -U scikit-learn
%pip install -U matplotlib


Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp313-cp313-win_amd64.whl.metadata (11 kB)
Downloading scikit_learn-1.7.2-cp313-cp313-win_amd64.whl (8.7 MB)
   ---------------------------------------- 0.0/8.7 MB ? eta -:--:--
   ---------------------------------------  8.7/8.7 MB 51.0 MB/s eta 0:00:01
   ---------------------------------------- 8.7/8.7 MB 42.2 MB/s  0:00:00
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.7.1
    Uninstalling scikit-learn-1.7.1:
      Successfully uninstalled scikit-learn-1.7.1
Successfully installed scikit-learn-1.7.2
Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.


Defaulting to user installation because normal site-packages is not writeable
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp313-cp313-win_amd64.whl.metadata (11 kB)
Downloading matplotlib-3.10.7-cp313-cp313-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   ---------------------------------------- 8.1/8.1 MB 76.2 MB/s  0:00:00
Installing collected packages: matplotlib
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.10.5
    Uninstalling matplotlib-3.10.5:
      Successfully uninstalled matplotlib-3.10.5
Successfully installed matplotlib-3.10.7
Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.


In [5]:
# load the data
from sklearn.datasets import load_breast_cancer
import numpy as np
import matplotlib.pyplot as plt

data = load_breast_cancer()

# Initialize parameters
w = np.random.randn(1)
b = np.random.randn(1)

In [31]:
def predict(X, w, b):
    """Compute linear scores.
    X: (m, n), w: (n, 1) or (n,), b: scalar or (1,)
    Returns: (m, 1) column vector of scores."""
    # ensure w is a column vector for dot product
    w = w.reshape(-1, 1) if w.ndim == 1 else w
    scores = np.dot(X, w) + b
    return scores


In [32]:
print(data.feature_names)
print(data.data)
print(data.target)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

In [33]:
ones = 0
zeros = 0
for i in range(len(data.target)):
    if data.target[i] == 1:
        ones += 1
    else:
        zeros += 1

print(ones)
print(f"{ones/(len(data.target)) * 100}%")
print(zeros)
print(f"{zeros/(len(data.target)) * 100}%")
print((len(data.target)))

357
62.741652021089635%
212
37.258347978910365%
569


In [34]:
from sklearn.model_selection import train_test_split

X = data.data
y = data.target

# Basic usage
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,        # 20% for testing
    random_state=42,      # for reproducibility
    stratify=y           # preserve class distribution (optional)
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

Training set: 455 samples
Test set: 114 samples


In [35]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

print(sigmoid(0))

0.5


In [36]:
def forward_pass(X, w, b):
    y_pred = predict(X, w, b)
    y_pred_classified = sigmoid(y_pred)
    return y_pred_classified

In [37]:
def compute_loss(y_true, y_pred):
    m = len(y_true)
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    loss = - (1/m) * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    return loss

In [38]:
def train(X, y, lr=0.01, epochs=1000):
    m, n = X.shape
    
    # Initialize weights and bias
    w = np.zeros((n, 1))
    b = 0
    loss_history = []

    # Make sure y is a column vector
    y = y.reshape(-1, 1)

    for i in range(epochs):
        # Forward pass: predicted probabilities
        y_pred = forward_pass(X, w, b)
        
        # Compute loss
        loss = compute_loss(y, y_pred)
        loss_history.append(loss)

        # Gradients
        dw = (1/m) * np.dot(X.T, (y_pred - y))
        db = (1/m) * np.sum(y_pred - y)

        # Update parameters
        w -= lr * dw
        b -= lr * db

        # Optional: print progress
        if i % 100 == 0:
            print(f"Epoch {i}: loss = {loss:.4f}")

    return w, b, loss_history

def final_predict(X, w, b):
    y_pred = forward_pass(X, w, b)
    return (y_pred >= 0.5).astype(int)

In [39]:
w_final, b_final, loss_history = train(X_train, y_train, lr=0.01, epochs=1000)

# Get binary predictions for the test set using final_predict
y_pred_test = final_predict(X_test, w_final, b_final)
# flatten both arrays to 1D for comparison
y_pred_test = y_pred_test.flatten()
y_test_flat = y_test.flatten()
accuracy = np.mean(y_pred_test == y_test_flat) * 100
print(f"\nFinal Test Accuracy: {accuracy:.2f}%")


Epoch 0: loss = 0.6931
Epoch 100: loss = 9.0391
Epoch 100: loss = 9.0391
Epoch 200: loss = 5.3728
Epoch 300: loss = 3.1123
Epoch 400: loss = 2.8006
Epoch 500: loss = 2.9024
Epoch 600: loss = 2.8559
Epoch 700: loss = 2.8893
Epoch 800: loss = 2.8846
Epoch 900: loss = 9.7835

Final Test Accuracy: 92.98%
Epoch 200: loss = 5.3728
Epoch 300: loss = 3.1123
Epoch 400: loss = 2.8006
Epoch 500: loss = 2.9024
Epoch 600: loss = 2.8559
Epoch 700: loss = 2.8893
Epoch 800: loss = 2.8846
Epoch 900: loss = 9.7835

Final Test Accuracy: 92.98%


  return 1 / (1 + np.exp(-z))
