In [8]:
import tensorflow as tf
import numpy as np

# Disable eager execution to use TF1-style code (placeholders, sessions, etc.)
tf.compat.v1.disable_eager_execution()

# ---------------------------
# 1. Load and Prepare MNIST Data
# ---------------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize and flatten images (28x28 => 784)
x_train = x_train.reshape(-1, 784).astype(np.float32) / 255.0
x_test  = x_test.reshape(-1, 784).astype(np.float32) / 255.0

# Convert labels to one-hot vectors (10 classes)
num_classes = 10
y_train = np.eye(num_classes)[y_train]
y_test  = np.eye(num_classes)[y_test]

# ---------------------------
# 2. Create Placeholders for Inputs and Labels
# ---------------------------
x_ph = tf.compat.v1.placeholder(tf.float32, [None, 784], name="x")
y_ph = tf.compat.v1.placeholder(tf.float32, [None, 10],  name="y")

# ---------------------------
# 3. Define Network Architecture
# ---------------------------
# Hidden layer with 256 neurons
hidden_units = 256
W1 = tf.Variable(tf.random.normal([784, hidden_units], stddev=0.1), name="W1")
b1 = tf.Variable(tf.zeros([hidden_units]), name="b1")

# Output layer with 10 neurons (one for each digit)
W2 = tf.Variable(tf.random.normal([hidden_units, 10], stddev=0.1), name="W2")
b2 = tf.Variable(tf.zeros([10]), name="b2")

# ---------------------------
# 4. Build Feed-Forward Computation
# ---------------------------
# Hidden layer: weighted sum + ReLU activation
z1 = tf.matmul(x_ph, W1) + b1
a1 = tf.nn.relu(z1)

# Output layer: weighted sum to produce logits
logits = tf.matmul(a1, W2) + b2

# Convert logits to probabilities for evaluation
predictions = tf.nn.softmax(logits)

# ---------------------------
# 5. Define Loss and Back-Propagation (Training)
# ---------------------------
# Use the updated loss function for TF 2.x: tf.nn.softmax_cross_entropy_with_logits
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_ph, logits=logits))
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

# ---------------------------
# 6. Define Accuracy Metric
# ---------------------------
correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(y_ph, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# ---------------------------
# 7. Training Parameters
# ---------------------------
epochs = 10
batch_size = 100
num_batches = x_train.shape[0] // batch_size

# ---------------------------
# 8. Train the Neural Network
# ---------------------------
with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.global_variables_initializer())
    
    for epoch in range(epochs):
        # (Optional) Shuffle training data at the start of each epoch
        indices = np.arange(x_train.shape[0])
        np.random.shuffle(indices)
        x_train = x_train[indices]
        y_train = y_train[indices]
        
        for i in range(num_batches):
            batch_x = x_train[i*batch_size:(i+1)*batch_size]
            batch_y = y_train[i*batch_size:(i+1)*batch_size]
            sess.run(optimizer, feed_dict={x_ph: batch_x, y_ph: batch_y})
        
        # Evaluate training performance after each epoch
        train_loss, train_acc = sess.run([loss, accuracy], feed_dict={x_ph: x_train, y_ph: y_train})
        print("Epoch:", epoch+1, "Loss:", train_loss, "Accuracy:", train_acc)
    
    # Evaluate on the test set
    test_acc = sess.run(accuracy, feed_dict={x_ph: x_test, y_ph: y_test})
    print("Test Accuracy:", test_acc)



Epoch: 1 Loss: 0.14476748 Accuracy: 0.9599
Epoch: 2 Loss: 0.08885283 Accuracy: 0.97568333
Epoch: 3 Loss: 0.06153954 Accuracy: 0.98253334
Epoch: 4 Loss: 0.044526562 Accuracy: 0.9881167
Epoch: 5 Loss: 0.032819714 Accuracy: 0.99113333
Epoch: 6 Loss: 0.023686841 Accuracy: 0.9942667
Epoch: 7 Loss: 0.017396122 Accuracy: 0.9963667
Epoch: 8 Loss: 0.01565872 Accuracy: 0.99645
Epoch: 9 Loss: 0.012676493 Accuracy: 0.99701667
Epoch: 10 Loss: 0.008269353 Accuracy: 0.9986167
Test Accuracy: 0.981
