In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Input, Dense, Conv2D, Flatten, Lambda, BatchNormalization, Concatenate, LeakyReLU, Dropout, Reshape, Conv2DTranspose, Activation
from keras.models import Model
from keras import backend as K
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
from utils import *


In [2]:
directory = '../images/final/'
width = 96
height = 96
channels = 4
image_size = (width, height)
input_shape = (width, height, channels)
batch_size = 32
latent_dim = 100


In [3]:
train_ds, class_names = load_data(directory, batch_size, image_size, GAN=True)


Found 13849 files belonging to 905 classes.


In [4]:
types = pd.read_csv('../PokeDataset.csv', usecols=['Name', 'Type1', 'Type2'])
# reorder by alphabetical order of column 'Name'
types = types.sort_values(by=['Name'])
types = types.reset_index(drop=True)


In [5]:
# give percentage of pokemon with double type
print(
    f"Percentage of pokemon with double type: {types['Type2'].count() / types['Type1'].count() * 100:.2f}%")


Percentage of pokemon with double type: 50.69%


In [6]:
types.head(5)


Unnamed: 0,Name,Type1,Type2
0,Abomasnow,Grass,Ice
1,Abra,Psychic,
2,Absol,Dark,
3,Accelgor,Bug,
4,Aegislash,Steel,Ghost


In [7]:
# make a dictionary of types and their index in the list of types
types_dict = {}
for i, t in enumerate(types['Type1'].unique()):
    types_dict[t] = i

# create vector of labels with 0 for each type except the one in the row (type1 and type2 are strings)
def create_label(row):
    label = np.zeros(18)
    label[types_dict[row['Type1']]] = 1
    if row['Type2'] in types_dict:
        label[types_dict[row['Type2']]] = 1
    return label


types['Label'] = types.apply(create_label, axis=1)


In [8]:
def types_to_vector(type1, type2=None):
    label = np.zeros(18)
    label[types_dict[type1]] = 1
    if type2 in types_dict:
        label[types_dict[type2]] = 1
    return label.reshape(1, 18)


In [9]:
for x, y in train_ds.take(1):
    print(int(tf.argmax(tf.reshape(y[0], [-1, 1]), axis=0)))
    plot_image(x[0], class_names[int(
        tf.argmax(tf.reshape(y[0], [-1, 1]), axis=0))], size=width)


521


In [10]:
# map labels in train_ds to the column Label in types
def map_labels(x, y):
    y_onehot = tf.argmax(y, axis=1).numpy()
    y_labels = np.zeros((len(y_onehot), 18))
    for i in range(len(y_onehot)):
        y_labels[i] = types.loc[y_onehot[i], 'Label']
    y_label = np.insert(y_labels, 0, 1, axis=1)
    y_label_tensor = tf.convert_to_tensor(y_label, dtype=tf.float32)
    return x, y_label_tensor


In [11]:
train_ds = prepare(train_ds, shuffle=True, augment=True, GAN=True)
train_ds = train_ds.map(lambda x, y: tf.py_function(
    map_labels, [x, y], [tf.float32, tf.float32]))

channels = 3
input_shape = (width, height, channels)




In [12]:
plot_n_images(train_ds, 5, class_names, GAN=True, size=width)


image shape :  (96, 96, 3)


In [13]:
# Define the encoder
def build_encoder(input_shape, latent_dim):
    input_encoder = Input(shape=input_shape, name='input_encoder')
    x = Conv2D(64, kernel_size=3, strides=2, padding='same')(input_encoder)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Conv2D(256, kernel_size=3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = BatchNormalization()(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # Define sampling function
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(
            shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
        return z_mean + K.exp(z_log_var / 2) * epsilon

    # Sample the latent space
    z = Lambda(sampling, output_shape=(latent_dim,),
               name='z')([z_mean, z_log_var])

    # Define the encoder
    encoder = Model(input_encoder, [z_mean, z_log_var, z], name='encoder')

    return encoder


# Define the decoder/generator
def build_decoder(latent_dim):
    # Define the noise vector input
    in_lat = Input(shape=(latent_dim,))
    # Project the concatenated vector to a larger dimension
    n = 128
    size = int(width/16)
    n_nodes = n * size * size
    dense = Dense(n_nodes)(in_lat)
    # Reshape the dense layer output
    reshaped = Reshape((size, size, n))(dense)
    # Upsample the feature maps
    upsampled = Conv2DTranspose(
        256, (4, 4), strides=(2, 2), padding='same')(reshaped)
    upsampled = LeakyReLU(alpha=0.2)(upsampled)
    upsampled = Conv2DTranspose(
        256, (4, 4), strides=(2, 2), padding='same')(upsampled)
    upsampled = LeakyReLU(alpha=0.2)(upsampled)
    upsampled = Conv2DTranspose(
        128, (4, 4), strides=(2, 2), padding='same')(upsampled)
    upsampled = LeakyReLU(alpha=0.2)(upsampled)
    upsampled = Conv2DTranspose(
        128, (4, 4), strides=(2, 2), padding='same')(upsampled)
    upsampled = LeakyReLU(alpha=0.2)(upsampled)
    # Output layer
    out_layer = Conv2D(
        3, (3, 3), activation='tanh', padding='same')(upsampled)
    # Define the model
    model = Model(in_lat, out_layer, name='decoder')

    return model


In [14]:
# Define the discriminator
def build_discriminator(input_shape, latent_dim):
    # Define both input layers
    img_input = Input(shape=input_shape)
    enc_input = Input(shape=(latent_dim,))

    # image branch
    img_layer = Conv2D(64, (3, 3), strides=(2, 2), padding='same')(img_input)
    img_layer = LeakyReLU(alpha=0.2)(img_layer)
    img_layer = Dropout(0.25)(img_layer)
    img_layer = Conv2D(128, (3, 3), strides=(2, 2), padding='same')(img_layer)
    img_layer = LeakyReLU(alpha=0.2)(img_layer)
    img_layer = Dropout(0.25)(img_layer)
    img_layer = Conv2D(256, (3, 3), strides=(2, 2), padding='same')(img_layer)
    img_layer = LeakyReLU(alpha=0.2)(img_layer)
    img_layer = Dropout(0.25)(img_layer)
    img_layer = Conv2D(512, (3, 3), strides=(2, 2), padding='same')(img_layer)
    img_layer = LeakyReLU(alpha=0.2)(img_layer)
    img_layer = Dropout(0.25)(img_layer)
    # Flatten the features
    features = Flatten()(img_layer)

    # label branch
    latent_layer = Dense(64)(enc_input)
    latent_layer = LeakyReLU(alpha=0.2)(latent_layer)
    latent_layer = Dense(128)(latent_layer)
    latent_layer = LeakyReLU(alpha=0.2)(latent_layer)
    latent_layer = Dense(256)(latent_layer)
    latent_layer = LeakyReLU(alpha=0.2)(latent_layer)
    latent_layer = Dense(256)(latent_layer)
    latent_layer = LeakyReLU(alpha=0.2)(latent_layer)
    latent_layer = Flatten()(latent_layer)

    # Concatenate the input layers
    merged_input = Concatenate()([features, latent_layer])
    merged_input = Dropout(0.4)(merged_input)
    validity = Dense(1, activation='sigmoid')(merged_input)

    # Define the model
    model = Model(inputs=[img_input, enc_input],
                  outputs=validity, name='discriminator')

    return model


In [15]:
decoder = build_decoder(latent_dim)
encoder = build_encoder(input_shape, latent_dim)
discriminator = build_discriminator(input_shape, latent_dim)


In [16]:
# Compile the models
from keras.optimizers import SGD, Adam
# optimizers
SGDop = SGD(learning_rate=0.0003)
ADAMop = Adam(learning_rate=0.0002)
# encoder
E = build_encoder(input_shape, latent_dim)
E.compile(optimizer=SGDop, loss='mse')
E.summary()
# generator/decoder
G = build_decoder(latent_dim)
G.compile(optimizer=SGDop, loss='mse')
G.summary()
# discriminator
D = build_discriminator(input_shape, latent_dim)
D.compile(optimizer=SGDop, loss='mse')
D.summary()


Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_encoder (InputLayer)     [(None, 96, 96, 3)]  0           []                               
                                                                                                  
 conv2d_9 (Conv2D)              (None, 48, 48, 64)   1792        ['input_encoder[0][0]']          
                                                                                                  
 leaky_re_lu_16 (LeakyReLU)     (None, 48, 48, 64)   0           ['conv2d_9[0][0]']               
                                                                                                  
 batch_normalization_5 (BatchNo  (None, 48, 48, 64)  256         ['leaky_re_lu_16[0][0]']         
 rmalization)                                                                               

In [17]:
X = Input(shape=input_shape)
latent_rep = E(X)[-1]
E_mean, E_logsigma, Z = E(X)


In [18]:
output = G(Z)
D_fake = D([output, Z])
D_true = D([X, latent_rep])


In [19]:
from keras import metrics, backend as K
VAE = Model(X, output)
kl = - 0.5 * K.sum(1 + E_logsigma - K.square(E_mean) -
                   K.exp(E_logsigma), axis=-1)
crossent = 64 * metrics.mse(K.flatten(X), K.flatten(output))
VAEloss = K.mean(crossent + kl)
VAE.add_loss(VAEloss)
VAE.compile(optimizer=SGDop)


In [20]:
latent_rep = Input(shape=(latent_dim,))
fake_or_true = D([G(latent_rep), latent_rep])
GAN = Model(latent_rep, fake_or_true)
GAN.compile(optimizer=SGDop, loss='binary_crossentropy')


In [30]:
for epoch in range(25):
    for i, batch in enumerate(train_ds):
        # Get randomly selected 'real' samples
        X_real, y_real_class = batch[0], batch[1][:, 0]
        half_batch = int(X_real.shape[0])
        latent_vect = E.predict(X_real, verbose=0)[-1]

        # Generate 'fake' examples
        noise = tf.random.normal([half_batch, latent_dim])
        fakeImg = G.predict(noise, verbose=0)

        # Train the discriminator
        DlossTrue = D.train_on_batch([X_real, latent_vect], y_real_class)
        DlossFake = D.train_on_batch(
            [fakeImg, noise], np.zeros((half_batch, 1)))

        # Train the generator
        if epoch % 4 == 0:
            GlossEnc = GAN.train_on_batch(
                latent_vect, np.ones((half_batch, 1)))
            GlossGen = GAN.train_on_batch(noise, np.ones((half_batch, 1)))
            Eloss = VAE.train_on_batch(X_real, None)

    if epoch % 10 == 0:
        D.save_weights(f'vaediscriminator{epoch}.h5')
        G.save_weights(f'vaegenerator{epoch}.h5')
        E.save_weights(f'vaeencoder{epoch}.h5')

    print("epoch number", epoch + 1)
    print("loss:")
    print("D:", DlossTrue, DlossFake)
    print("G:", GlossEnc, GlossGen)
    print("VAE:", Eloss)

print('Training done,saving weights')
D.save_weights('vaediscriminator.h5')
G.save_weights('vaegenerator.h5')
E.save_weights('vaeencoder.h5')
print('end')


epoch number 1
loss:
D: 0.2678421139717102 0.24402441084384918
G: 0.6994256973266602 0.7021198272705078
VAE: 175.24673461914062
epoch number 1
loss:
D: 0.27213597297668457 0.24997669458389282
G: 0.7054152488708496 0.7017720937728882
VAE: 167.93971252441406
epoch number 1
loss:
D: 0.27350252866744995 0.2470567226409912
G: 0.6974077224731445 0.7009695768356323
VAE: 169.04281616210938
epoch number 1
loss:
D: 0.2680792808532715 0.24521134793758392
G: 0.7021411061286926 0.6975362300872803
VAE: 165.32888793945312
epoch number 1
loss:
D: 0.2646970748901367 0.247745543718338
G: 0.6952120065689087 0.690826416015625
VAE: 166.5557861328125
epoch number 1
loss:
D: 0.2631726861000061 0.25100189447402954
G: 0.6954878568649292 0.6896201372146606
VAE: 156.93505859375
epoch number 1
loss:
D: 0.2630050778388977 0.24975381791591644
G: 0.6953389644622803 0.6909964084625244
VAE: 145.1114501953125
epoch number 1
loss:
D: 0.25775331258773804 0.25022435188293457
G: 0.6930232048034668 0.6923444271087646
VAE: 1