In [16]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_auc_score, accuracy_score


In [17]:
# Load and Prepare Data (Same as before for fair comparison)
print("Loading MNIST data...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

X = ((X / 255.) - .5) * 2

# Convert labels to integers
y = y.astype(int)

Loading MNIST data...


In [18]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123, stratify=y
)

# One-hot encode labels for Keras training and AUC calculation
enc = OneHotEncoder()
y_train_onehot = enc.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_onehot = enc.transform(y_test.reshape(-1, 1)).toarray()

In [19]:
model = tf.keras.models.Sequential([
    # Hidden Layer 1
    tf.keras.layers.Dense(500, activation='sigmoid', input_shape=(784,)),

    # Hidden Layer 2
    tf.keras.layers.Dense(500, activation='sigmoid'),

    # Output Layer: 'softmax'
    tf.keras.layers.Dense(10, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
# 4. Compile
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

model.compile(optimizer=optimizer,
              loss='mse',
              metrics=['accuracy'])


In [21]:
# 5. Train
print("Training Keras model...")
history = model.fit(X_train, y_train_onehot,
                    epochs=20,
                    batch_size=100,
                    validation_split=0.1,
                    verbose=1)

Training Keras model...
Epoch 1/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.1290 - loss: 0.0904 - val_accuracy: 0.2724 - val_loss: 0.0891
Epoch 2/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.1943 - loss: 0.0889 - val_accuracy: 0.3067 - val_loss: 0.0881
Epoch 3/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.2784 - loss: 0.0878 - val_accuracy: 0.3594 - val_loss: 0.0870
Epoch 4/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.3063 - loss: 0.0866 - val_accuracy: 0.3308 - val_loss: 0.0855
Epoch 5/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - accuracy: 0.3475 - loss: 0.0849 - val_accuracy: 0.3206 - val_loss: 0.0834
Epoch 6/20
[1m441/441[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - accuracy: 0.3579 - loss: 0.0825 - val_accuracy: 0.3765 - val_loss: 0.08

In [22]:
# 6. Evaluate
print("\nEvaluating performance...")
y_pred_proba = model.predict(X_test)
y_pred_class = np.argmax(y_pred_proba, axis=1)

acc = accuracy_score(y_test, y_pred_class)
macro_auc = roc_auc_score(y_test_onehot, y_pred_proba, average='macro', multi_class='ovr')

print(f"Test Accuracy: {acc*100:.2f}%")
print(f"Macro AUC: {macro_auc*100:.2f}")


Evaluating performance...
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
Test Accuracy: 85.06%
Macro AUC: 98.19
