## Presentation :

The purpose of this notebook is to build a GAN to generate/discriminate e-commerce clothings images. The dataset used is the Fashion MNIST from tensorflow datasets.

## Install dependencies that might not be already installed in kaggle environnement

In [None]:
pip install matplotlib tensorflow-datasets

## Import libraries + hardware test

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from matplotlib import pyplot as plt
import numpy as np

gpus = tf.config.experimental.list_physical_devices('GPU')

for gpu in gpus:
    
    print(gpu)
    tf.config.experimental.set_memory_growth(gpu, True)

2024-04-29 11:48:10.843683: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 11:48:10.843749: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 11:48:10.845227: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
strategy = tf.distribute.MirroredStrategy()

# Print the number of devices detected
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

## Load dataset from tensorflow dataset

In [None]:
dataset = tfds.load('fashion_mnist', split='train')

## Visualize small sample of the data

In [None]:
image_pipeline = dataset.as_numpy_iterator()
image_pipeline.next()['image']

fig, ax = plt.subplots(ncols=4, figsize=(20,20))

for idx in range(4): 
    sample = image_pipeline.next()
    ax[idx].imshow(np.squeeze(sample['image']))
    ax[idx].title.set_text(sample['label'])

## Preprocessing

In [None]:
def scale_images(data): 
    image = data['image']
    return image / 255

dataset = tfds.load('fashion_mnist', split='train')
dataset = dataset.map(scale_images) # Parallelizing Data Transformation
dataset = dataset.cache()
dataset = dataset.shuffle(10000)  # Adjusted buffer size for memory management
dataset = dataset.batch(256)  # Adjust based on your GPU
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)  # Auto-tune the prefetch size
dataset = strategy.experimental_distribute_dataset(dataset) # Leverage the two GPU

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Reshape, LeakyReLU, Dropout, UpSampling2D, BatchNormalization

## Build generator model

In [None]:
def build_generator():
    model = Sequential()

    # Input layer
    model.add(Dense(7*7*128, input_dim=128))
    model.add(LeakyReLU(0.2))
    model.add(Reshape((7,7,128)))

    # Upsampling block 1
    model.add(UpSampling2D())
    model.add(Conv2D(128, 5, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    # Upsampling block 2
    model.add(UpSampling2D())
    model.add(Conv2D(128, 5, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    # Convolutional block 1
    model.add(Conv2D(128, 4, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    # Convolutional block 2
    model.add(Conv2D(128, 4, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(0.2))

    # Output conv layer
    model.add(Conv2D(1, 4, padding='same', activation='tanh'))

    return model

generator = build_generator()
generator.summary()

## Test untrained generator

In [None]:
img = generator.predict(np.random.randn(4,128,1))
# Generate new fashion
img = generator.predict(np.random.randn(4,128,1))
# Setup the subplot formatting 
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
# Loop four times and get images 
for idx, img in enumerate(img): 
    # Plot the image using a specific subplot 
    ax[idx].imshow(np.squeeze(img))
    # Appending the image label as the plot title 
    ax[idx].title.set_text(idx)

## Build discriminator

In [None]:
def build_discriminator():
    model = Sequential()
    
    # First Conv Block
    model.add(Conv2D(32, 5, strides=(2, 2), padding='same', input_shape=(28, 28, 1)))
    model.add(LeakyReLU(0.2))
    
    # Second Conv Block
    model.add(Conv2D(64, 5, strides=(2, 2), padding='same'))
    model.add(LeakyReLU(0.2))
    
    # Third Conv Block
    model.add(Conv2D(128, 5, strides=(2, 2), padding='same'))
    model.add(LeakyReLU(0.2))
    
    # Fourth Conv Block
    model.add(Conv2D(256, 5, strides=(2, 2), padding='same'))
    model.add(LeakyReLU(0.2))
    
    # Flatten then pass to dense layer
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    
    return model

discriminator = build_discriminator()
discriminator.summary()

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Mean

## Build GAN architecture

In [None]:
class FashionGAN(Model): 
    def __init__(self, generator, discriminator, *args, **kwargs):
        # Pass through args and kwargs to base class 
        super().__init__(*args, **kwargs)
        
        # Create attributes for gen and disc
        self.generator = generator 
        self.discriminator = discriminator 
        
    def compile(self, g_opt, d_opt, g_loss, d_loss, *args, **kwargs): 
        # Compile with base class
        super().compile(*args, **kwargs)
        
        # Create attributes for losses and optimizers
        self.g_opt = g_opt
        self.d_opt = d_opt
        self.g_loss = g_loss
        self.d_loss = d_loss 

    def train_step(self, batch):
        # Get the data 
        real_images = batch
        fake_images = self.generator(tf.random.normal((128, 128, 1)), training=False)
        
        # Train the discriminator
        with tf.GradientTape() as d_tape: 
            # Pass the real and fake images to the discriminator model
            yhat_real = self.discriminator(real_images, training=True) 
            yhat_fake = self.discriminator(fake_images, training=True)
            yhat_realfake = tf.concat([yhat_real, yhat_fake], axis=0)
            
            # Create labels for real and fakes images
            y_realfake = tf.concat([tf.zeros_like(yhat_real), tf.ones_like(yhat_fake)], axis=0)
            
            # Add some noise to the TRUE outputs
            noise_real = 0.15*tf.random.uniform(tf.shape(yhat_real))
            noise_fake = -0.15*tf.random.uniform(tf.shape(yhat_fake))
            y_realfake += tf.concat([noise_real, noise_fake], axis=0)
            
            # Calculate loss - BINARYCROSS 
            total_d_loss = self.d_loss(y_realfake, yhat_realfake)
            
        # Apply backpropagation - nn learn 
        dgrad = d_tape.gradient(total_d_loss, self.discriminator.trainable_variables) 
        self.d_opt.apply_gradients(zip(dgrad, self.discriminator.trainable_variables))
        
        # Train the generator 
        with tf.GradientTape() as g_tape: 
            # Generate some new images
            gen_images = self.generator(tf.random.normal((128,128,1)), training=True)
                                        
            # Create the predicted labels
            predicted_labels = self.discriminator(gen_images, training=False)
                                        
            # Calculate loss - trick to training to fake out the discriminator
            total_g_loss = self.g_loss(tf.zeros_like(predicted_labels), predicted_labels) 
            
        # Apply backprop
        ggrad = g_tape.gradient(total_g_loss, self.generator.trainable_variables)
        self.g_opt.apply_gradients(zip(ggrad, self.generator.trainable_variables))
        
        return {"d_loss":total_d_loss, "g_loss":total_g_loss}

## Instantiation of the GAN + scope the training loop to use all GPUs

In [None]:
with strategy.scope():
    generator = build_generator()
    discriminator = build_discriminator()

    # Create instance of subclassed model
    fashgan = FashionGAN(generator, discriminator)

    # Optimizers and loss functions
    g_opt = Adam(learning_rate=0.000002, beta_1=0.5)
    d_opt = Adam(learning_rate=0.0000002, beta_1=0.5)

    g_loss = BinaryCrossentropy()
    d_loss = BinaryCrossentropy()

    # Compile the GAN model
    fashgan.compile(g_opt, d_opt, g_loss, d_loss)

## Train the GAN

In [None]:
hist = fashgan.fit(dataset, epochs=5000)

## Plot the loss during training

In [None]:
plt.suptitle('Loss')
plt.plot(hist.history['d_loss'], label='d_loss')
plt.plot(hist.history['g_loss'], label='g_loss')
plt.legend()
plt.show()

## Generate images with the trained generator

In [None]:
noise = tf.random.normal((16, 128))
imgs = generator(noise, training=False)
print("Generated images shape:", imgs.shape)

if imgs.shape == (16, 28, 28, 1):  # Expected shape
    fig, ax = plt.subplots(ncols=4, nrows=4, figsize=(10, 10))
    ax = ax.flatten()
    for i in range(16):
        ax[i].imshow(imgs[i, :, :, 0])
        ax[i].axis('off')
    plt.show()
else:
    print("Unexpected shape of generated images:", imgs.shape)

In [None]:
generator.save('generator.h5')
discriminator.save('discriminator.h5')

## 2nd attempt : Leveraging pre-trained model instead of training from scratch

In [None]:
import tensorflow_hub as hub
from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, Conv2DTranspose, Conv2D, BatchNormalization, LeakyReLU, UpSampling2D, Reshape
from tensorflow.keras.applications import ResNet50

def build_generator():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(100,)),
        tf.keras.layers.Dense(7*7*256, use_bias=False),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Reshape((7, 7, 256)),
        tf.keras.layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.UpSampling2D(),
        tf.keras.layers.Conv2DTranspose(64, (5, 5), strides=(1, 1), padding='same', use_bias=False),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.UpSampling2D(),
        tf.keras.layers.Conv2DTranspose(1, (5, 5), strides=(1, 1), padding='same', use_bias=False, activation='tanh')
    ])
    return model

def build_discriminator():
    # Define the input shape and preprocess inputs
    inputs = Input(shape=(28, 28, 1))
    x = UpSampling2D(size=(8, 8))(inputs)  # Upsample to 224x224
    x = Conv2D(3, (3, 3), padding='same', activation='relu')(x)  # Convert to 3 channels
    
    # Utilize ResNet50 as a feature extractor
    resnet_model = ResNet50(include_top=False, input_shape=(224, 224, 3), pooling='avg')
    resnet_model.trainable = False  # Freeze the model
    x = resnet_model(x)

    # Flatten the output and add a Dense layer for binary classification
    x = Flatten()(x)
    outputs = Dense(1, activation='sigmoid')(x)

    # Create the model
    model = Model(inputs, outputs)
    return model

class FashionGAN(Model): 
    def __init__(self, generator, discriminator, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.generator = generator 
        self.discriminator = discriminator 
        
    def compile(self, g_opt, d_opt, g_loss, d_loss, *args, **kwargs): 
        super().compile(*args, **kwargs)
        self.g_opt = g_opt
        self.d_opt = d_opt
        self.g_loss = g_loss
        self.d_loss = d_loss 

    def train_step(self, data):
        # Unpack the data tuple (images and labels)
        real_images, _ = data  # Assuming labels are not used

        batch_size = tf.shape(real_images)[0]
        random_latent_vectors = tf.random.normal(shape=(batch_size, 100))

        with tf.GradientTape() as d_tape, tf.GradientTape() as g_tape:
            fake_images = self.generator(random_latent_vectors, training=True)
            real_output = self.discriminator(real_images, training=True)
            fake_output = self.discriminator(fake_images, training=True)

            d_loss_real = self.d_loss(tf.zeros_like(real_output), real_output)
            d_loss_fake = self.d_loss(tf.ones_like(fake_output), fake_output)
            total_d_loss = d_loss_real + d_loss_fake

            total_g_loss = self.g_loss(tf.zeros_like(fake_output), fake_output)

        d_grads = d_tape.gradient(total_d_loss, self.discriminator.trainable_variables)
        g_grads = g_tape.gradient(total_g_loss, self.generator.trainable_variables)

        self.d_opt.apply_gradients(zip(d_grads, self.discriminator.trainable_variables))
        self.g_opt.apply_gradients(zip(g_grads, self.generator.trainable_variables))

        # Return a dictionary mapping metric names to their current value tensors
        return {"d_loss": total_d_loss, "g_loss": total_g_loss}

# Assume using TensorFlow's strategy for distributed training
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    generator = build_generator()
    discriminator = build_discriminator()

    fashgan = FashionGAN(generator, discriminator)
    g_opt = Adam(learning_rate=0.0002, beta_1=0.5)
    d_opt = Adam(learning_rate=0.0002, beta_1=0.5)
    g_loss = BinaryCrossentropy()
    d_loss = BinaryCrossentropy()

    fashgan.compile(g_opt, d_opt, g_loss, d_loss)

## Preprocessing

In [None]:
# Load Fashion-MNIST dataset
(train_images, train_labels), _ = tf.keras.datasets.fashion_mnist.load_data()

# Normalize the images to [0, 1]
train_images = train_images.astype('float32') / 255.0

# Expand the dimensions to (28, 28, 1)
train_images = train_images[..., tf.newaxis]

# Create a TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))

# Shuffle and batch the dataset
dataset = dataset.shuffle(buffer_size=1024).batch(32)  # Adjust batch size as needed

# Use 'prefetch' to improve performance
dataset = dataset.prefetch(tf.data.AUTOTUNE)

## Training 

In [None]:
hist = fashgan.fit(dataset, epochs=12)

## Generate image with the trained generator

In [None]:
noise = tf.random.normal((16, 100))
imgs = generator(noise, training=False)
print("Generated images shape:", imgs.shape)

if imgs.shape == (16, 28, 28, 1):  # Expected shape
    fig, ax = plt.subplots(ncols=4, nrows=4, figsize=(10, 10))
    ax = ax.flatten()
    for i in range(16):
        ax[i].imshow(imgs[i, :, :, 0])
        ax[i].axis('off')
    plt.show()
else:
    print("Unexpected shape of generated images:", imgs.shape)

In [None]:
generator.save('generator3.h5')
discriminator.save('discriminator3.h5')

In [None]:
generator.save_weights('generator_weights.weights.h5')
discriminator.save_weights('discriminator_weights.weights.h5')

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!jupyter nbconvert --to pdf /kaggle/input/hello-world-turn-this-into-a-pdf/__notebook__.ipynb --output /kaggle/working/output.pdf

## Conclusion

Hardware/Computing power and training time are a major hurdle in training and deploying deep learning model !

Areas of improvement
Go back to initial architecture without using pre-trained model
Setup a more powerful NN training lab
Give more training time to the model
Experiment with different architecture for generator and discriminator
