In [1]:
import numpy as np
import pandas as pd

# Step 1: Create a simple sample dataset (spam = 1, ham = 0)
np.random.seed(42)
X = np.random.randn(100, 1)  # one feature
y = (X[:, 0] > 0).astype(int)  # label: 1 if feature > 0, else 0

# Step 2: Initialize parameters
w = 0.0
b = 0.0

# Adam hyperparameters
alpha = 0.01
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8

# Moment vectors
m_w, v_w = 0.0, 0.0
m_b, v_b = 0.0, 0.0

# For storing values
history = []

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Step 3: Adam optimization loop
for t in range(1, 101):  # 100 iterations
    # Forward pass
    z = X @ np.array([[w]]) + b
    y_pred = sigmoid(z).flatten()

    # Compute gradients
    error = y_pred - y
    grad_w = np.mean(error * X.flatten())
    grad_b = np.mean(error)

    # Update moments for w
    m_w = beta1 * m_w + (1 - beta1) * grad_w
    v_w = beta2 * v_w + (1 - beta2) * (grad_w ** 2)

    # Bias correction
    m_w_hat = m_w / (1 - beta1 ** t)
    v_w_hat = v_w / (1 - beta2 ** t)

    # Update w
    w -= alpha * m_w_hat / (np.sqrt(v_w_hat) + epsilon)

    # Update moments for b
    m_b = beta1 * m_b + (1 - beta1) * grad_b
    v_b = beta2 * v_b + (1 - beta2) * (grad_b ** 2)

    # Bias correction
    m_b_hat = m_b / (1 - beta1 ** t)
    v_b_hat = v_b / (1 - beta2 ** t)

    # Update b
    b -= alpha * m_b_hat / (np.sqrt(v_b_hat) + epsilon)

    # Store the last 10 steps
    if t > 90:
        history.append({
            't': t,
            'm_w': m_w,
            'v_w': v_w,
            'm_w_hat': m_w_hat,
            'v_w_hat': v_w_hat,
            'w': w
        })

# Convert to DataFrame for display
df_history = pd.DataFrame(history)
print(df_history.tail(10))


     t       m_w       v_w   m_w_hat   v_w_hat         w
0   91 -0.220538  0.006760 -0.220553  0.077676  0.826613
1   92 -0.219304  0.006796 -0.219317  0.077285  0.834502
2   93 -0.218080  0.006832 -0.218092  0.076898  0.842367
3   94 -0.216866  0.006868 -0.216877  0.076514  0.850207
4   95 -0.215663  0.006903 -0.215673  0.076133  0.858023
5   96 -0.214470  0.006938 -0.214479  0.075755  0.865816
6   97 -0.213287  0.006972 -0.213295  0.075380  0.873585
7   98 -0.212114  0.007005 -0.212121  0.075008  0.881330
8   99 -0.210951  0.007039 -0.210957  0.074638  0.889052
9  100 -0.209798  0.007071 -0.209803  0.074272  0.896750


In [2]:
z_final = X @ np.array([[w]]) + b
y_pred_final = sigmoid(z_final).flatten()
y_pred_class = (y_pred_final >= 0.5).astype(int)

accuracy = np.mean(y_pred_class == y)
print(f" Final Accuracy: {accuracy * 100:.2f}%")

df_result = pd.DataFrame({
    "Feature": X.flatten(),
    "Actual": y,
    "Predicted_Prob": y_pred_final.round(3),
    "Predicted_Class": y_pred_class
})

print("\nðŸ“„ Sample Predictions:")
print(df_result.head(10))


 Final Accuracy: 95.00%

ðŸ“„ Sample Predictions:
    Feature  Actual  Predicted_Prob  Predicted_Class
0  0.496714       1           0.586                1
1 -0.138264       0           0.445                0
2  0.647689       1           0.619                1
3  1.523030       1           0.781                1
4 -0.234153       0           0.424                0
5 -0.234137       0           0.424                0
6  1.579213       1           0.789                1
7  0.767435       1           0.644                1
8 -0.469474       0           0.374                0
9  0.542560       1           0.596                1
