In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize and reshape the input images
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

# Define the autoencoder architecture
def create_autoencoder(input_shape):
    input_img = Input(shape=input_shape)
    encoded = Flatten()(input_img)
    encoded = Dense(128, activation='relu')(encoded)
    decoded = Dense(np.prod(input_shape), activation='sigmoid')(encoded)
    decoded = Reshape(input_shape)(decoded)
    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
    return autoencoder



# Create and train the first autoencoder
autoencoder1 = create_autoencoder((28, 28, 1))
autoencoder1.fit(x_train, x_train, epochs=10, batch_size=128)

# Create and train the second autoencoder
autoencoder2 = create_autoencoder((28, 28, 1))
autoencoder2.fit(autoencoder1.predict(x_train), autoencoder1.predict(x_train), epochs=10, batch_size=128)

def generate_fgsm_adversarial(model, x, y, epsilon=0.01):
    x_adv = tf.constant(x, dtype=tf.float32)

    with tf.GradientTape() as tape:
        tape.watch(x_adv)
        logits = model(x_adv, training=False)
        loss = keras.losses.sparse_categorical_crossentropy(y, logits)

    gradient = tape.gradient(loss, x_adv)
    signed_grad = tf.sign(gradient)
    x_adv = tf.clip_by_value(x_adv + epsilon * signed_grad, 0, 1)

    return x_adv.numpy()

def generate_pgd_adversarial(model, x, y, epsilon=0.01, alpha=0.01, num_iter=40):
    x_adv = tf.Variable(x, dtype=tf.float32)

    for _ in range(num_iter):
        with tf.GradientTape() as tape:
            tape.watch(x_adv)
            logits = model(x_adv, training=False)
            loss = keras.losses.sparse_categorical_crossentropy(y, logits)

        gradient = tape.gradient(loss, x_adv)
        signed_grad = tf.sign(gradient)
        x_adv.assign_add(alpha * signed_grad)
        x_adv.assign(tf.clip_by_value(x_adv, x - epsilon, x + epsilon))
        x_adv.assign(tf.clip_by_value(x_adv, 0, 1))

    return x_adv.numpy()

  from tensorflow.keras.layers import Flatten, Dense, Dropout
classifier = keras.Sequential([
    Flatten(input_shape=(28, 28, 1)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])
# Compile and train the classifier on the MNIST dataset
classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
classifier.fit(x_train, y_train, epochs=5, batch_size=128)


# Generate FGSM adversarial images and denoise them
epsilon = 0.1  # Adjust the epsilon value as desired
adv_images_fgsm = generate_fgsm_adversarial(classifier, x_test, y_test, epsilon)
denoised_images1_fgsm = autoencoder1.predict(adv_images_fgsm)
denoised_images2_fgsm = autoencoder2.predict(denoised_images1_fgsm)

# Generate PGD adversarial images and denoise them
epsilon = 0.1  # Adjust the epsilon value as desired
alpha = 0.01  # Adjust the alpha value as desired
num_iter = 40  # Adjust the number of iterations as desired
adv_images_pgd = generate_pgd_adversarial(classifier, x_test, y_test, epsilon, alpha, num_iter)
denoised_images1_pgd = autoencoder1.predict(adv_images_pgd)
denoised_images2_pgd = autoencoder2.predict(denoised_images1_pgd)

# Generate CW blackbox adversarial images and denoise them
epsilon = 0.1  # Adjust the epsilon value as desired
num_iter = 40  # Adjust the number of iterations as desired
confidence = 0.0  # Adjust the confidence value as desired

# adv_images_cw = generate_cw_blackbox_adversarial(classifier, x_test, y_test, epsilon, num_iter, confidence)
# denoised_images1_cw = autoencoder1.predict(adv_images_cw)
# denoised_images2_cw = autoencoder2.predict(denoised_images1_cw)

# Evaluate the classifier on the original adversarial images
original_adv_acc_fgsm = classifier.evaluate(adv_images_fgsm, y_test, verbose=0)[1]
original_adv_acc_pgd = classifier.evaluate(adv_images_pgd, y_test, verbose=0)[1]
#original_adv_acc_cw = classifier.evaluate(adv_images_cw, y_test, verbose=0)[1]
print('Accuracy on original adversarial image (FGSM):', original_adv_acc_fgsm)
print('Accuracy on original adversarial image (PGD):', original_adv_acc_pgd)
#print('Accuracy on original adversarial image (CW):', original_adv_acc_cw)

# Evaluate the classifier on the denoised images
print('Accuracy on denoised image after using two autoencoders: ')
denoised_acc_fgsm1 = classifier.evaluate(denoised_images1_fgsm, y_test, verbose=0)[1]
denoised_acc_pgd1 = classifier.evaluate(denoised_images1_pgd, y_test, verbose=0)[1]
#denoised_acc_cw1 = classifier.evaluate(denoised_images1_cw, y_test, verbose=0)[1]
print('Accuracy on denoised image (FGSM):', denoised_acc_fgsm1)
print('Accuracy on denoised image (PGD):', denoised_acc_pgd1)
#print('Accuracy on denoised image (CW):', denoised_acc_cw1)


# Evaluate the classifier on the denoised images
denoised_acc_fgsm = classifier.evaluate(denoised_images2_fgsm, y_test, verbose=0)[1]
denoised_acc_pgd = classifier.evaluate(denoised_images2_pgd, y_test, verbose=0)[1]
#denoised_acc_cw = classifier.evaluate(denoised_images2_cw, y_test, verbose=0)[1]
print('Accuracy on denoised image after using one autoencoders: ')
print('Accuracy on denoised image (FGSM):', denoised_acc_fgsm)
print('Accuracy on denoised image (PGD):', denoised_acc_pgd)
#print('Accuracy on denoised image (CW):', denoised_acc_cw)