In [1]:
import tensorflow as tf
import plotly.express as px
import matplotlib.pyplot as plt
from utils import *
import pandas as pd

In [2]:
directory = 'images/final/'
width = 64
height = 64
channels = 4
image_size = (width, height)
input_shape = (width, height, channels)
batch_size = 32

In [3]:
train_ds, class_names = load_data(directory, batch_size, image_size, GAN=True)

Found 13849 files belonging to 905 classes.


In [4]:
types = pd.read_csv('PokeDataset.csv', usecols=['Name', 'Type1', 'Type2'])

In [5]:
types.sample(5)

Unnamed: 0,Name,Type1,Type2
247,Tyranitar,Rock,Dark
114,Kangaskhan,Normal,
769,Palossand,Ghost,Ground
240,Miltank,Normal,
754,Morelull,Grass,Fairy


In [6]:
# make a dictionary of types and their index in the list of types
types_dict = {}
for i, t in enumerate(types['Type1'].unique()):
    types_dict[t] = i

# create vector of labels with 0 for each type except the one in the row (type1 and type2 are strings)
def create_label(row):
    label = np.zeros(18)
    label[types_dict[row['Type1']]] = 1
    if row['Type2'] in types_dict:
        label[types_dict[row['Type2']]] = 1
    return label


types['Label'] = types.apply(create_label, axis=1)

In [48]:
def type_to_vector(type):
    label = np.zeros(18)
    label[types_dict[type]] = 1
    return label.reshape(1, 18)

In [7]:
# map labels in train_ds to the column Label in types
def map_labels(x, y):
    y_onehot = tf.one_hot(tf.argmax(y, axis=1), depth=18)
    y_label = y_onehot.numpy()
    for i in range(len(y_label)):
        y_label[i] = types.iloc[i]['Label']
    # add a 1 before the label to indicate that it is a real image
    y_label = np.insert(y_label, 0, 1, axis=1)
    y_label_tensor = tf.convert_to_tensor(y_label, dtype=tf.float32)
    return x, y_label_tensor

train_ds = train_ds.map(lambda x, y: tf.py_function(map_labels, [x, y], [tf.float32, tf.float32]))


In [9]:
train_ds = prepare(train_ds, shuffle=True, augment=False, GAN=True)

channels = 3
input_shape = (width, height, channels)

In [10]:
plot_n_images(train_ds, 5, class_names, GAN=True, size=width)

(64, 64, 3)


(64, 64, 3)


(64, 64, 3)


(64, 64, 3)


(64, 64, 3)


In [14]:
# define the discriminator model
def define_cdiscriminator(input_shape, num_classes):
    img_input = tf.keras.layers.Input(shape=input_shape)
    label_input = tf.keras.layers.Input(shape=(num_classes,))
    
    # image branch
    layer = tf.keras.layers.Conv2D(64, (3, 3), padding='same')(img_input)
    layer = tf.keras.layers.LeakyReLU(alpha=0.2)(layer)
    layer = tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same')(layer)
    layer = tf.keras.layers.LeakyReLU(alpha=0.2)(layer)
    layer = tf.keras.layers.Conv2D(256, (3, 3), strides=(2, 2), padding='same')(layer)
    layer = tf.keras.layers.LeakyReLU(alpha=0.2)(layer)
    layer = tf.keras.layers.Conv2D(256, (3, 3), strides=(2, 2), padding='same')(layer)
    layer = tf.keras.layers.LeakyReLU(alpha=0.2)(layer)
    layer = tf.keras.layers.Flatten()(layer)

    # label branch
    label_layer = tf.keras.layers.Dense(64)(label_input)
    label_layer = tf.keras.layers.LeakyReLU(alpha=0.2)(label_layer)
    label_layer = tf.keras.layers.Dense(128)(label_layer)
    label_layer = tf.keras.layers.LeakyReLU(alpha=0.2)(label_layer)
    label_layer = tf.keras.layers.Dense(256)(label_layer)
    label_layer = tf.keras.layers.LeakyReLU(alpha=0.2)(label_layer)
    label_layer = tf.keras.layers.Dense(256)(label_layer)
    label_layer = tf.keras.layers.LeakyReLU(alpha=0.2)(label_layer)
    label_layer = tf.keras.layers.Flatten()(label_layer)

    # concatenate image and label branches
    layer = tf.keras.layers.concatenate([layer, label_layer])
    layer = tf.keras.layers.Dropout(0.4)(layer)
    layer = tf.keras.layers.Dense(1, activation='sigmoid')(layer)
    
    # define the model
    model = tf.keras.models.Model(inputs=[img_input, label_input], outputs=layer)

    # compile the model
    opt = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    
    return model

    
# define the standalone generator model
def define_cgenerator(latent_dim, num_classes):
    # Define the noise vector input
    in_lat = tf.keras.layers.Input(shape=(latent_dim,))
    # Define the class vector input
    in_class = tf.keras.layers.Input(shape=(num_classes,))
    # Concatenate the noise and class vectors
    merge = tf.keras.layers.Concatenate()([in_lat, in_class])
    # Project the concatenated vector to a larger dimension
    n_nodes = 256 * 8 * 8
    dense = tf.keras.layers.Dense(n_nodes)(merge)
    # Reshape the dense layer output
    reshaped = tf.keras.layers.Reshape((8, 8, 256))(dense)
    # Upsample the feature maps
    upsampled = tf.keras.layers.Conv2DTranspose(256, (4,4), strides=(2,2), padding='same')(reshaped)
    upsampled = tf.keras.layers.LeakyReLU(alpha=0.2)(upsampled)
    upsampled = tf.keras.layers.Conv2DTranspose(256, (4,4), strides=(2,2), padding='same')(upsampled)
    upsampled = tf.keras.layers.LeakyReLU(alpha=0.2)(upsampled)
    upsampled = tf.keras.layers.Conv2DTranspose(128, (4,4), strides=(2,2), padding='same')(upsampled)
    upsampled = tf.keras.layers.LeakyReLU(alpha=0.2)(upsampled)
    # Output layer
    out_layer = tf.keras.layers.Conv2D(3, (3,3), activation='tanh', padding='same')(upsampled)
    # Define the model
    model = tf.keras.models.Model([in_lat, in_class], out_layer)
    return model

In [15]:
def define_cgan(g_model, d_model):
    d_model.trainable = False
    gen_noise, gen_label = g_model.input
    gen_output = g_model.output
    gan_output = d_model([gen_output, gen_label])
    model = tf.keras.Model([gen_noise, gen_label], gan_output)
    opt = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
    model.compile(loss='categorical_crossentropy', optimizer=opt)
    return model


In [19]:
# build and compile the discriminator
cdiscriminator = define_cdiscriminator((64, 64, 3), 18)
cdiscriminator.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 64, 64, 3)]  0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 18)]         0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 64, 64, 64)   1792        ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 64)           1216        ['input_2[0][0]']                
                                                                                              

In [20]:
# build the generator
cgenerator = define_cgenerator(100, 18)
cgenerator.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 18)]         0           []                               
                                                                                                  
 concatenate_1 (Concatenate)    (None, 118)          0           ['input_3[0][0]',                
                                                                  'input_4[0][0]']                
                                                                                                  
 dense_5 (Dense)                (None, 16384)        1949696     ['concatenate_1[0][0]']    

In [21]:
# build the gan
cgan_model = define_cgan(cgenerator, cdiscriminator)
cgan_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 100)]        0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 18)]         0           []                               
                                                                                                  
 concatenate_1 (Concatenate)    (None, 118)          0           ['input_3[0][0]',                
                                                                  'input_4[0][0]']                
                                                                                                  
 dense_5 (Dense)                (None, 16384)        1949696     ['concatenate_1[0][0]']    

In [35]:
def generate_fake_samples(g_model, latent_dim, n_samples, n_classes):
    # generate points in latent space
    noise_input = generate_latent_points(latent_dim, n_samples)
    # generate random class labels
    y_types = generate_class_labels(n_samples, n_classes)
    # predict outputs
    X = g_model.predict([noise_input, y_types], verbose=0)
    # # create 'fake' class labels (0)
    y_class = tf.zeros((n_samples, 1))
    return X, y_types, y_class

def generate_latent_points(latent_dim, n_samples):
    # generate points in the latent space
    x_input = tf.random.normal(shape=(n_samples, latent_dim))
    return x_input

def generate_class_labels(n_samples, n_classes):
    # generate random one-hot class labels
    class_labels = tf.random.uniform((n_samples, n_classes), minval=0, maxval=1, dtype=tf.int32)
    return tf.one_hot(tf.argmax(class_labels, axis=1), depth=n_classes)

def generate_real_samples(dataset, n_samples):
    X, y_types, y_class = [], [], []
    while len(X) < n_samples:
        dataset.shuffle(100, seed=123)
        for x, label in dataset.take(5):
            for i in range(len(x)):
                X.append(x[i])
                y_types.append(label[i][1:])
                y_class.append(label[i][0])
                if len(X) == n_samples:
                    break
            if len(X) == n_samples:
                break
    X = tf.stack(X)
    y_types = tf.stack(y_types)
    y_class = tf.reshape(tf.stack(y_class), (n_samples, 1))
    return X, y_types, y_class

In [42]:
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=100):
    # prepare real samples
    X_real, y_real_types, y_real_class = generate_real_samples(dataset, n_samples)
    print(X_real.shape, y_real_types.shape, y_real_class.shape)
    # evaluate discriminator on real examples
    _, acc_real = d_model.evaluate([X_real, y_real_types], y_real_class, verbose=0)
    # prepare fake examples
    x_fake, y_fake_types, y_fake_class = generate_fake_samples(g_model, latent_dim, n_samples, 18)
    # evaluate discriminator on fake examples
    _, acc_fake = d_model.evaluate([x_fake, y_fake_types], y_fake_class, verbose=0)
    # summarize discriminator performance
    print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))
    # save plot
    save_plot(x_fake, epoch)
    # save the generator model tile file
    filename = 'cgenerator_model_64_e%03d.h5' % (epoch+1)
    g_model.save(filename)

def save_plot(examples, epoch, n=10):
    # plot images
    for i in range(n * n):
        # define subplot
        plt.subplot(n, n, 1 + i)
        # turn off axis
        plt.axis('off')
        # plot raw pixel data
        print(examples[i])
        plt.imshow(examples[i, :, :, :])
    # save plot to file
    filename = 'cgenerated_plot_64_e%03d.png' % (epoch+1)
    plt.savefig(filename)
    plt.close()

In [40]:
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=10, n_batch=64):
    bat_per_epo = int(dataset.cardinality())
    # manually enumerate epochs
    for i in range(n_epochs):
        # enumerate batches over the training set
        for j, batch in enumerate(dataset):
            # get randomly selected 'real' samples
            X_real, y_real_class, y_real_types = batch[0], batch[1][:, 0], batch[1][:, 1:]
            half_batch = int(X_real.shape[0])
            # generate 'fake' examples
            # y_gan = tf.one_hot(tf.random.uniform((half_batch, 1), minval=0, maxval=10, dtype=tf.int32), depth=10)
            X_fake, y_fake_types, y_fake_class = generate_fake_samples(g_model, latent_dim, half_batch, 18)
            # create training set for the discriminator
            y_real_class = tf.reshape(y_real_class, (half_batch, 1))
            # y_fake = tf.zeros((half_batch, 1))
            # print(y_real.shape, y_fake.shape, X_real.shape, X_fake.shape)
            X, y_types = tf.concat([X_real, X_fake], axis=0), tf.concat([y_real_types, y_fake_types], axis=0)
            y_class = tf.concat([y_real_class, y_fake_class], axis=0)
            # update discriminator model weights
            # print(X.shape, y_types.shape, y_class.shape)
            d_loss, _ = d_model.train_on_batch([X, y_types], y_class)
            # prepare points in latent space as input for the generator
            X_gan, y_gan_types = generate_latent_points(latent_dim, n_batch), tf.squeeze(tf.one_hot(tf.random.uniform((n_batch, 1), minval=0, maxval=18, dtype=tf.int32), depth=18))
            # update the generator via the discriminator's error
            y_gan_class = tf.zeros((n_batch, 1))
            # print(X_gan.shape, y_gan_types.shape, y_gan_class.shape)
            g_loss = gan_model.train_on_batch([X_gan, y_gan_types], y_gan_class)
            # summarize loss on this batch
            print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))
        # evaluate the model performance, sometimes
        if (i) % 10 == 0:
            summarize_performance(i, g_model, d_model, dataset, latent_dim)
    # save the generator model
    g_model.save('cgenerator_model_64_final.h5')


In [41]:
# size of the latent space
latent_dim = 100
# train model
train(cgenerator, cdiscriminator, cgan_model, train_ds, latent_dim, n_epochs=1)


>1, 1/433, d=0.000, g=0.000
>1, 2/433, d=0.000, g=0.000
>1, 3/433, d=0.000, g=0.000
>1, 4/433, d=0.000, g=0.000
>1, 5/433, d=0.000, g=0.000
>1, 6/433, d=0.000, g=0.000
>1, 7/433, d=0.000, g=0.000
>1, 8/433, d=0.000, g=0.000
>1, 9/433, d=0.000, g=0.000
>1, 10/433, d=0.000, g=0.000
>1, 11/433, d=0.000, g=0.000
>1, 12/433, d=0.000, g=0.000
>1, 13/433, d=0.000, g=0.000
>1, 14/433, d=0.000, g=0.000
>1, 15/433, d=0.000, g=0.000
>1, 16/433, d=0.000, g=0.000
>1, 17/433, d=0.000, g=0.000
>1, 18/433, d=0.000, g=0.000
>1, 19/433, d=0.000, g=0.000
>1, 20/433, d=0.000, g=0.000
>1, 21/433, d=0.000, g=0.000
>1, 22/433, d=0.000, g=0.000
>1, 23/433, d=0.000, g=0.000
>1, 24/433, d=0.000, g=0.000
>1, 25/433, d=0.000, g=0.000
>1, 26/433, d=0.000, g=0.000
>1, 27/433, d=0.000, g=0.000
>1, 28/433, d=0.000, g=0.000
>1, 29/433, d=0.000, g=0.000
>1, 30/433, d=0.000, g=0.000
>1, 31/433, d=0.000, g=0.000
>1, 32/433, d=0.000, g=0.000
>1, 33/433, d=0.000, g=0.000
>1, 34/433, d=0.000, g=0.000
>1, 35/433, d=0.000, g=

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


>Accuracy real: 95%, fake: 100%


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping i



In [77]:
# save model to resume training
cgenerator.save('cgenerator_model_64.h5')
cdiscriminator.save('cdiscriminator_model_64.h5')
cgan_model.save('cgan_model_64.h5')



test

In [17]:
# generator = tf.keras.models.load_model('cgenerator_model.h5')



In [57]:
# while True:
#     X = cgenerator.predict(generate_latent_points(100, 1), verbose=0)
#     fake_or_true = cdiscriminator.predict(X, verbose=0)[0][0]
#     if fake_or_true > 0.5:
#         break
# plot_image(X[0], "image générée", {"real": fake_or_true})

In [53]:
X = cgenerator.predict([generate_latent_points(100, 1), type_to_vector('Grass')], verbose=0)
plot_image(X[0], "image générée", {"real": cdiscriminator.predict([X, type_to_vector('Grass')], verbose=0)[0][0]}, 64)

In [75]:
# test image with random noise between -1 and 1
test_image_noise = tf.random.uniform(shape=(1, 64, 64, 3), minval=-1, maxval=1)
# print(test_image_noise[0][100][50])

In [76]:
plot_image(test_image_noise[0], "image test", {"real": discriminator.predict(test_image_noise, verbose=0)[0][0]})