<a href="https://colab.research.google.com/github/waynchi/SC-Net/blob/master/SC_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install mnist
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import tensorflow as tf
tf.test.is_gpu_available()


# Experiment Notes

- 1 vs 2.5 upper bound (1 seems to work fine)
- Deeper vs more filters
  - 8 as lowest with 32 filters ( ~750k parameters) gave blurry results around 900 epochs for overfitting test of 2. Also, seems to have overfit on only one example.
  - 4 as lowest with 32 filters ( ~2.9M parameters) gave blurry results for overfitting test of 2 around 1000 epochs. 
  - 8 as lowest with 64 filters (~2.9M parameters) gave...
  - 4 as lowest with 64 filters (~11M parameters) gave.... Way to slow. Also accuracy was still only ~0.05 after 800 epochs on 100 samples
- The biggest issue is that the intensity accuracy stays around 0.05 which is far too low.

Debugging Steps

1. Does it work for pure orderless NADE?
2. Are the architecture parameters correct?
3. Are the input and two target images correct?
4. Are you able to overfit on one datapoint?
5. Two?
6. Are the loss functions appropriate?
7. What do the loss values look like?

### Why is my loss suddenly spiking?
1. Learning Rate -> Might need decay
2. Dropout is too high / in the wrong place
3. You might be introducing NaN in your data. Check with assert not np.any(np.isnan(x))
4. You might be having NaN due to your log loss (Solution: ??? Try softmax with logits?)

# What about a GAN + Self correcting U-Net ? That would make for a cool architecture
# Following CGAN -> adding a 1-hot vector encoding of the label to the training data
# Simulated Annealing?
# Generator -> VAE -> Discriminator?
# What about feeding in a dicriminator's confidence level as a temperature during the autoregressive? Inverse confidence?
# What about a 3 dimensional GAN?
# What about adding attention to the model?

# Umut Notes
- Add a stop condition to the softmax
    - Tried both 2 outputs and just an extra variable to the softmax
    - 2 outputs fails due to it having too much weight to the loss and the loss fluctuates like crazy
    - extra variable fails as the probability is still small even for an original image. Not sure why. Maybe because each time wew generate we use a new random which causes the dataset to be imbalanced?
- 2 steps process (pick note and then choose how much through binary cross entropy)

In [0]:
import mnist
import scipy.misc
from PIL import Image
from pprint import pprint
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from copy import deepcopy
from keras.datasets import cifar10


def make_grayscale(data, dtype=np.float32):
    # luma coding weighted average in video systems
    r, g, b = np.asarray(.3, dtype=dtype), np.asarray(.59, dtype=dtype), np.asarray(.11, dtype=dtype)
    rst = r * data[:, :, :, 0] + g * data[:, :, :, 1] + b * data[:, :, :, 2]
    # add channel dimension
    rst = np.expand_dims(rst, axis=3)
    rst = rst.astype(np.uint8)
    return rst

def create_image(image, name, image_shape, is_grayscale=False):
    img_arr = deepcopy(image.reshape(image_shape)).astype(np.uint8)
    img_arr = np.squeeze(img_arr)

    if is_grayscale:
        img = Image.fromarray(img_arr.astype(np.uint8), 'L')
    else:
        img = Image.fromarray(img_arr.astype(np.uint8), 'RGB')
    # pprint(img_arr)
    # print("img shape: {}. img sum: {}".format(img_arr.shape, img_arr.sum()))
    img.save(name)
    return img

is_single = True
is_grayscale = False  # False for Color
is_cifar_10 = True

if is_single:
    num_samples = 10
    epochs_per_sample = 1000
    num_sub_layers = 1
else:
    num_samples = 60000
    epochs_per_sample = 20
    num_sub_layers = 1

if is_cifar_10:
    (images, labels), (_, _) = cifar10.load_data()
    if is_grayscale:
        images = make_grayscale(images)
else:
    images = mnist.train_images()

    # np.random.shuffle(images)
images = images[:num_samples, :, :]

# pprint(images)
print(images.shape)

# labels = mnist.train_labels()
# n_labels = np.max(labels) + 1
# labels = np.eye(n_labels)[labels]
# print(labels.shape)

if is_cifar_10:
    image_shape = images[0].shape
else:
    image_shape = np.expand_dims(images[0], axis=-1).shape 

print(images[0])
create_image(images[0], 'my.png', image_shape, is_grayscale=is_grayscale)
create_image(images[-1], 'my2.png', image_shape, is_grayscale=is_grayscale)
print(image_shape)

In [0]:
def to_one_hot(arr):
    arr = deepcopy(arr)
    arr = arr.astype(np.uint8)
    n_values = 256
    one_hot = np.eye(n_values)[arr]
    one_hot = one_hot.astype(np.uint8)
    return one_hot

one_hot = to_one_hot(images[0])
print(one_hot.shape)

argmax_res = np.argmax(one_hot, axis=-1)
print(argmax_res)


In [0]:
import keras
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout, Flatten, Dense, Softmax, Reshape, Activation
from keras.optimizers import Adam
from keras.utils import plot_model
from keras import backend as K
from tensorflow.python.ops import math_ops

def built_in_softmax_kl_loss(target, output):
    target = K.flatten(target)
    output = K.flatten(output)
    
    target = target / K.sum(target)
    output = K.softmax(output)
    return keras.losses.kullback_leibler_divergence(target, output)

def intensity_softmax_loss(target, output):
    return keras.losses.categorical_crossentropy(target, output, from_logits=True)

keras.losses.built_in_softmax_kl_loss = built_in_softmax_kl_loss
keras.losses.intensity_softmax_loss = intensity_softmax_loss

def conv_layer(n_filters, filter_size, conv):
    conv = Conv2D(n_filters, filter_size, activation='relu', padding='same')(conv)
    conv = Conv2D(n_filters, filter_size, activation='relu', padding='same')(conv)
    conv = Conv2D(n_filters, filter_size, activation='relu', padding='same')(conv)
    return conv    
 
def unet_model(input_size=(28, 28, 1), n_filters_start=32, growth_factor=2,
               upconv=False, is_grayscale=True, num_sub_layers=1):
    droprate=0.5
    n_filters = n_filters_start
    inputs = Input(input_size)
    conv_first = conv_layer(n_filters, (3, 3), inputs)
    pool_first = MaxPooling2D(pool_size=(2, 2))(conv_first)

    prev_pool = pool_first
    hidden_layers = []
    for _ in range(num_sub_layers):
        n_filters *= growth_factor
        pool = BatchNormalization()(prev_pool)
        conv = conv_layer(n_filters, (3, 3), pool)
        pool = MaxPooling2D(pool_size=(2, 2))(conv)
        pool = Dropout(droprate)(pool)
        prev_pool = pool
        hidden_layers.append(conv)
 
    n_filters *= growth_factor
    conv_mid = conv_layer(n_filters, (3, 3), prev_pool)
    # print(hidden_layers)
 
    n_filters //= growth_factor
    if upconv:
        up_first = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(conv_mid), hidden_layers[-1]])
    else:
        up_first = concatenate([UpSampling2D(size=(2, 2))(conv_mid), hidden_layers[-1]])
    up_first = BatchNormalization()(up_first)
    conv_mid_2 = conv_layer(n_filters, (3, 3), up_first)
    conv_mid_2 = Dropout(droprate)(conv_mid_2)

    prev_conv = conv_mid_2
    for i in range(num_sub_layers - 1):
        n_filters //= growth_factor
        if upconv:
            up = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(prev_conv), hidden_layers[-i-2]])
        else:
            up = concatenate([UpSampling2D(size=(2, 2))(prev_conv), hidden_layers[-i-2]])
        up = BatchNormalization()(up)
        conv = conv_layer(n_filters, (3, 3), up)
        conv = Dropout(droprate)(conv)
        prev_conv = conv
 
    n_filters //= growth_factor
    if upconv:
        up_last = concatenate([Conv2DTranspose(n_filters, (2, 2), strides=(2, 2), padding='same')(prev_conv), conv_first])
    else:
        up_last = concatenate([UpSampling2D(size=(2, 2))(prev_conv), conv_first])
    conv_last = conv_layer(n_filters, (3, 3), up_last)
 
    softmax_out = Conv2D(1, 1, activation='linear', name='softmax_out')(conv_last)

    if is_grayscale:
        sigmoid_out = Conv2D(256, 1, padding='valid', name='sigmoid_out')(conv_last)
        sigmoid_out = Reshape((*image_shape[:-1], 256))(sigmoid_out)
        # sigmoid_out = Activation('softmax')(sigmoid_out)
        model = Model(inputs=inputs, outputs=[softmax_out, sigmoid_out])
        model.compile(optimizer=Adam(lr=0.001), loss=[built_in_softmax_kl_loss, intensity_softmax_loss], metrics=['categorical_accuracy'])
    else:
        intensity_softmax = Conv2D(256 * 3, 1, padding='valid', name='intensity_conv')(conv_last)
        intensity_softmax = Reshape((*image_shape, 256))(intensity_softmax)
        # intensity_softmax = Activation('softmax', name='intensity_softmax')(intensity_softmax)
        model = Model(inputs=inputs, outputs=[softmax_out, intensity_softmax])
        model.compile(optimizer=Adam(lr=0.001), loss=[built_in_softmax_kl_loss, intensity_softmax_loss], metrics=['categorical_accuracy'])

    # model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

In [0]:
model = unet_model(input_size=image_shape, is_grayscale=is_grayscale, num_sub_layers=num_sub_layers)

In [0]:
# discriminator_model = discriminator(input_size=image_shape)

In [0]:
from copy import deepcopy
import math
import itertools
import time
import random

noise_upper_bound = 2.5

def mask_image_with_noise(image, is_grayscale=True):
    image = deepcopy(image)
    sampling_percentage_mask = np.random.uniform(0, 100)
    sampling_percentage_noise = np.random.uniform(0, noise_upper_bound)
    pixel_count = np.prod(image.shape[:-1])
    mask = np.full(image.shape[:-1], False).flatten()
    noise = np.full(image.shape[:-1], False).flatten()
    amount_to_mask = math.floor(pixel_count * (sampling_percentage_mask / 100.0))
    mask[:amount_to_mask] = True
    amount_of_noise = math.floor(pixel_count * (sampling_percentage_noise / 100.0))
    noise[:amount_of_noise] = True
    np.random.shuffle(mask)
    np.random.shuffle(noise)
    # Take into account the values that are already 0
    if is_grayscale:
        image = image.flatten()
        mask[image == 0] = False
        noise[image == 0] = False
    else:
        argmax_image = np.sum(image, axis=-1)
        argmax_image = argmax_image.flatten()
        mask[argmax_image == 0] = False
        noise[argmax_image == 0] = False

    output_image = deepcopy(image)
    xor_target = np.full(pixel_count, False).flatten()

    if is_grayscale:
        output_image = output_image.flatten()
        output_image[mask] = 0
        xor_target[mask] = True
    else:
        output_image = output_image.reshape(-1, 3)
        output_image[mask, :] = 0
        xor_target[mask] = True

    # There might be overlap but that is ok
    random_values = np.random.uniform(0, 1, image.shape).flatten()
    random_values *= 255
    random_values = np.around(random_values)
    random_values = random_values.astype(np.uint8)
    if is_grayscale:
        random_values = random_values[:np.sum(noise)]
        output_image[noise] = random_values
        xor_target[noise] = True
    else:
        output_image = output_image.reshape(-1, 3)
        random_values = random_values[:np.sum(noise) * 3].reshape(-1, 3)
        output_image[noise, :] = random_values
        xor_target[noise] = True

    output_image = output_image.reshape(image.shape)
    xor_target = xor_target.reshape(image.shape[:-1])
    xor_target = np.expand_dims(xor_target, axis=-1)
    return output_image, xor_target


class ImageGenerator(keras.utils.Sequence):
    def __init__(self, sample_list, image_shape, batch_size, samples_per_data_item, stops_per_data_item, is_grayscale=True, seed=None):
        print("sample_list: {}".format(len(sample_list)))
        self.sample_list = sample_list
        self.image_shape = image_shape
        self.batch_size = batch_size
        self.samples_per_data_item = samples_per_data_item
        self.stops_per_data_item = stops_per_data_item
        self.is_grayscale = is_grayscale
        self.sample_index = 0
        self.seed = seed
        self.dtype = np.uint8
        # if self.seed is not None:
        #     np.random.seed(self.seed)

    def generate_training_pairs(self):
        '''
        Generates Training Pairs till @training_input / @training_target have @batch_size files.
        '''
        training_input = []
        training_original = []
        training_target = []
        while len(training_input) < self.batch_size:
            original_image = deepcopy(self.sample_list[self.sample_index])
            original_image = original_image.reshape(self.image_shape)
            binary_image = deepcopy(original_image)
            binary_image[binary_image > 0] = 1
            self.sample_index = (self.sample_index + 1) % len(self.sample_list)
            # print("sample_list length: {}. sample_index: {}".format(
            #     len(self.sample_list), self.sample_index))
            try:
                # augment by adding and removing random values in the array

                # Add random values
                for _ in range(self.samples_per_data_item):
                    original_image = original_image.astype(self.dtype)
                    input_image, xor_target = mask_image_with_noise(original_image, is_grayscale=self.is_grayscale)

                    input_image = input_image.astype(self.dtype)
                    xor_target = xor_target.astype(self.dtype)

                    training_input.append(deepcopy(input_image))
                    training_original.append(to_one_hot(np.squeeze(original_image)))
                    # training_original.append(deepcopy(original_image))
                    training_target.append(deepcopy(xor_target))

            except Exception as e:
                print('Error generating input and target pair')
                traceback.print_exc()
        training_input = np.asarray(training_input)
        training_target = np.asarray(training_target)
        training_original = np.asarray(training_original)
        return training_input, training_target, training_original

    def save_image(self, img_arr, img_name, is_target=False):
        # img_arr = img_arr.reshape(self.image_shape)
        print(img_name)
        print("img shape: {}. img sum: {}".format(img_arr.shape, img_arr.sum()))
        img_arr = np.squeeze(img_arr)
        print("img shape: {}. img sum: {}".format(img_arr.shape, img_arr.sum()))
        print(img_arr)
        #pprint(img_arr)
        print("img shape: {}. img sum: {}".format(img_arr.shape, img_arr.sum()))
        if self.is_grayscale or is_target:
            if is_target:
                img_arr *= 255
            img = Image.fromarray(img_arr.astype(np.uint8), 'L')
        else:
            img = Image.fromarray(img_arr.astype(np.uint8), 'RGB')

        img.save(img_name)

    def get_random_training_pair(self):
        training_input, training_target, training_original = self.generate_training_pairs()
        print("training_input shape: {}".format(training_input.shape))
        index = random.randrange(0, len(training_input))
        self.save_image(deepcopy(training_input[index]), 'training_input.png')
        self.save_image(deepcopy(training_target[index]), 'training_target.png', is_target=True)
        print(training_original.shape)
        original_image = deepcopy(training_original[index])
        original_image = np.argmax(original_image, axis=-1)
        print(original_image.shape)
        original_image = np.expand_dims(original_image, axis=-1)
        self.save_image(original_image, 'training_original.png')

    def generate_validation_samples(self):
        old_batch_size = self.batch_size
        self.batch_size = len(self.sample_list) * (self.samples_per_data_item + self.stops_per_data_item)
        training_input, training_target, training_original = self.generate_training_pairs()
        # training_input = np.asarray(self.training_input[:self.batch_size])
        # training_target = np.asarray(self.training_target[:self.batch_size])
        self.batch_size = old_batch_size
        return training_input, [training_target, training_original]

    def __getitem__(self, index):
        '''Generates 1 batch of data'''
        training_input, training_target, training_original = self.generate_training_pairs()
        return training_input, [training_target, training_original]

    def __len__(self):
        '''Number of batches / epoch'''
        # print("sample_list: {}. samples_per_data_item: {}, batch size: {}".
        #       format(len(self.sample_list), self.samples_per_data_item,
        #              self.batch_size))
        samples_to_generate = int(
            (len(self.sample_list) * (self.samples_per_data_item + self.stops_per_data_item)) /
            self.batch_size)
        # print("samples to generate: {}".format(samples_to_generate))
        return samples_to_generate
    
    # def on_epoch_begin(self):
    #     if self.seed is not None:
    #         np.random.seed(self.seed)
    #     else:
    #         np.random.seed(time.time())


In [0]:
# Config
stops_per_data_item = 0
if is_single:
    batch_size = num_samples * 32
    samples_per_data_item = 1 * 32
    split = 1
else:
    batch_size = 128
    samples_per_data_item = 1
    split = 0.9

In [0]:
training_samples = images[:int(len(images) * split)]
validation_samples = images[int(len(images) * split):]

print("training samples: {}. validation samples: {}".format(len(training_samples), len(validation_samples)))

steps_per_epoch = int(len(training_samples) * (samples_per_data_item + stops_per_data_item) / batch_size)
print("steps per epoch: {}".format(steps_per_epoch))

# pprint(training_samples[0])

training_generator = ImageGenerator(
    sample_list=training_samples,
    image_shape=image_shape,
    batch_size=batch_size,
    samples_per_data_item=samples_per_data_item,
    stops_per_data_item=stops_per_data_item,
    is_grayscale=is_grayscale)

validation_generator = ImageGenerator(
    sample_list=validation_samples,
    image_shape=image_shape,
    batch_size=batch_size,
    samples_per_data_item=samples_per_data_item,
    stops_per_data_item=stops_per_data_item,
    is_grayscale=is_grayscale)

validation_data = validation_generator.generate_validation_samples()

# print("validation data input and target shape: {}".format(validation_data[0].shape))




In [0]:
training_generator.get_random_training_pair()

In [0]:
if is_single:
    is_single_text = "single"
else:
    is_single_text = "full"

model_custom_name = 'cifar-grayscale-double-softmax'
model_full_name = '{}-num-samples-{}-noise-upper-{}-num-sub-layers-{}-{}'.format(model_custom_name, num_samples, noise_upper_bound, num_sub_layers, is_single_text)
model_location = '/content/drive/My Drive/checkpoints/{}.hdf5'.format(model_full_name)
log_dir = '/content/drive/My Drive/logs/{}'.format(model_full_name)
print(log_dir)
print(model_location)

In [0]:
# updatable plot
# a minimal example
from IPython.display import clear_output

class PlotLosses(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.legend()
        # plt.xscale('log')
        plt.show();
        
plot_losses = PlotLosses()

In [0]:
import os
import shutil
import time

class EvaluateCallback(keras.callbacks.Callback):
    def __init__(self, image_shape, sample_dir):
        self.image_shape = image_shape
        self.sample_dir = sample_dir

    def on_epoch_end(self, epoch, logs=None):
        if epoch % epochs_per_sample == 0:
            sample_sqrt = 2
            generated_images = []
            for i in range(sample_sqrt**2):
                directory = "images_{}".format(i)
                os.makedirs(directory, exist_ok=True)
                input_image = self.generate_noise()

                img, _ = self.inference(model, input_image, directory, 3000)
                generated_images.append(img)
          
            final_im = Image.new('RGB', (image_shape[0] * sample_sqrt, image_shape[1] * sample_sqrt))

            y_offset = 0
            for i in range(sample_sqrt):
                x_offset = 0
                new_im = Image.new('RGB', (image_shape[0] * sample_sqrt, image_shape[1]))
                for j in range(sample_sqrt):
                    im = deepcopy(generated_images[(i * sample_sqrt) + j])
                    new_im.paste(im, (x_offset, 0))
                    x_offset += image_shape[0]
                final_im.paste(new_im, (0, y_offset))
                y_offset += image_shape[0]
                
            os.makedirs(self.sample_dir, exist_ok=True)
            final_im.save(os.path.join(self.sample_dir, 'samples_epoch_{}.png'.format(epoch)))


    def generate_noise(self):
        input_image = np.full(self.image_shape, 0)
        input_image = input_image.astype(np.uint8)
        input_image = np.expand_dims(input_image, 0)
        return input_image

    def inference(self, model, input_image, directory, iterations):        
        working_image = deepcopy(input_image)

        for i in range(iterations):
            softmax_predictions, sigmoid_predictions = model.predict(working_image)
            softmax_predictions = softmax_predictions.flatten()

            softmax_predictions = softmax_predictions - np.max(softmax_predictions)
            softmax_predictions = np.exp(softmax_predictions)
            softmax_predictions = softmax_predictions / np.sum(softmax_predictions)
            indices = np.arange(softmax_predictions.shape[0])

            index = np.random.choice(indices, p=softmax_predictions)

            if is_grayscale:
                sigmoid_predictions = sigmoid_predictions.reshape(-1, 256)
                working_image = working_image.flatten()
            else:
                sigmoid_predictions = sigmoid_predictions.reshape(-1, 3, 256)
                working_image = working_image.reshape(-1, 3)

            if is_grayscale:
                sigmoid_probs = sigmoid_predictions[index]
                sigmoid_probs = sigmoid_probs - np.max(sigmoid_probs)
                sigmoid_probs = np.exp(sigmoid_probs)
                sigmoid_probs = sigmoid_probs / np.sum(sigmoid_probs)
                sigmoid_indices = np.arange(sigmoid_probs.shape[0])
                working_image[index] = np.random.choice(sigmoid_indices, p=sigmoid_probs)
            else:
                for channel in range(3):
                    sigmoid_probs = sigmoid_predictions[index, channel]
                    sigmoid_probs = sigmoid_probs - np.max(sigmoid_probs)
                    sigmoid_probs = np.exp(sigmoid_probs)
                    sigmoid_probs = sigmoid_probs / np.sum(sigmoid_probs)
                    sigmoid_indices = np.arange(sigmoid_probs.shape[0])
                    chosen_index = np.random.choice(sigmoid_indices, p=sigmoid_probs)
                    working_image[index, channel] = chosen_index

            working_image = np.reshape(working_image, [1, *self.image_shape])

        final_image = working_image
        img = create_image(final_image, os.path.join(directory, 'final.png'), image_shape=self.image_shape)
        return img, deepcopy(final_image)

In [0]:
import os

model = unet_model(input_size=image_shape, is_grayscale=is_grayscale, num_sub_layers=num_sub_layers)

resume_training = False
if resume_training:
    model = keras.models.load_model(model_location)

model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=model_location,
    monitor='val_loss',
    save_weights_only=False,
    verbose=1,
    mode='min',
    save_best_only=False)

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='epoch')
evaluate_callback = EvaluateCallback(image_shape, log_dir)

if True:
    if is_single:
        history = model.fit(
            training_generator,
            # validation_data=validation_data,
            verbose=1,
            shuffle=True,
            steps_per_epoch=steps_per_epoch,
            epochs=30000,
            callbacks=[model_checkpoint_callback, evaluate_callback, tensorboard_callback])#, tensorboard_callback])
    else:
        history = model.fit(
            training_generator,
            validation_data=validation_data,
            verbose=1,
            shuffle=True,
            steps_per_epoch=steps_per_epoch,
            epochs=1000,
            callbacks=[model_checkpoint_callback, plot_losses, tensorboard_callback, evaluate_callback])#, tensorboard_callback])
    #epochs=cfg.epochs,
    #callbacks=callbacks)
# model.save("sc-model.hdf5")

Current experiment: 10 data items Only 30% accuracy T.T.
10000 epochs and 40% accuracy... Grayscale was able to hit 80% accuracy at 50000 epochs. Loss oscillates a lot.... maybe I need LR decay? Maybe a larger model?

# Experiments

I tried changing from softmax + categorical_cross entropy into categorical_cross_entropy with logits which is the same as softmax_nn with logits. This fixes some log instability that occurs when training for a long time.

10 samples works fairly well for orderless NADE w/o noise! only reached around 42% accuracy at 20000 epochs, but it just means the images are slightly blurry for right now.

trying 10 samples with 2.5 % noise! ~30000 epochs. nade-cifar-color-double-softmax-num-samples-10-noise-upper-2.5-single.hdf5. Noticed that this model converges ~half has fast as the model for grayscale.



In [0]:
# "Loss"
if True:
    plt.plot(history.history['loss'])
    if not is_single:
        plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train'], loc='upper left')
    # plt.xscale('log')
    plt.show()

# Inference

In [0]:
# testing model predict with seaborn and plots
model = keras.models.load_model(model_location)
import seaborn as  sb
import matplotlib.pyplot as plt
print(image_shape)
def generate_noise():
    input_image = np.full(image_shape, 0)

    input_image = input_image.astype(np.uint8)
    input_image = np.expand_dims(input_image, 0)
    return input_image

test_image = generate_noise()
softmax_predictions, sigmoid_predictions = model.predict(test_image)
softmax_predictions = softmax_predictions.reshape(image_shape[0], image_shape[1])
heatmap = sb.heatmap(softmax_predictions)
plt.show()
softmax_predictions = np.exp(softmax_predictions)
softmax_predictions = softmax_predictions / np.sum(softmax_predictions)
print(sigmoid_predictions.shape)
# print(sigmoid_predictions[])
sigmoid_predictions = np.argmax(sigmoid_predictions, axis=-1)
print(sigmoid_predictions.shape)
sigmoid_predictions = np.squeeze(sigmoid_predictions)
heatmap = sb.heatmap(softmax_predictions)
plt.show()
if is_grayscale:
    heatmap = sb.heatmap(sigmoid_predictions)
    plt.show()



In [0]:
import os
import shutil
import time


def inference(model, input_image, directory, iterations, temp_start=2, temp_end=0.5, top_k=250, is_grayscale=True, is_debug=False):
    create_image(input_image, "{}/input.png".format(directory), image_shape=image_shape)

    # temperatures = np.linspace(temp_start, temp_end, num=iterations)
    temperatures = np.geomspace(temp_start, temp_end, num=iterations)
    temperatures_reverse = (temp_start + temp_end) - temperatures[::-1]
    temperatures = np.concatenate((temperatures_reverse[:int(temperatures.shape[0]/2)], temperatures[int(temperatures.shape[0]/2):]))
    
    working_image = deepcopy(input_image)
    working_images = []
    num_added = 0
    num_removed = 0
    for i in range(iterations):
        temp = temperatures[i]            
        binary_image = deepcopy(working_image)
        binary_image[binary_image > 0] = 1
        softmax_predictions, sigmoid_predictions = model.predict(working_image)

        softmax_predictions = softmax_predictions.flatten()

        softmax_predictions = softmax_predictions - np.max(softmax_predictions)
        softmax_predictions = np.exp(softmax_predictions / temp)
        softmax_predictions = softmax_predictions / np.sum(softmax_predictions)
        indices = np.arange(softmax_predictions.shape[0])

        # zipped = zip(softmax_predictions, indices)
        # zipped = list(reversed(sorted(zipped, key = lambda x : x[0])))
        # zipped = zipped[:top_k]
        # zipped = sorted(zipped, key=lambda x : x[1])
        # softmax_predictions, indices = zip(*zipped)
        # softmax_predictions = np.asarray(softmax_predictions)
        # softmax_predictions = softmax_predictions / np.sum(softmax_predictions)
        # indices = np.asarray(indices)

        index = np.random.choice(indices, p=softmax_predictions)

        if is_grayscale:
            # sigmoid_predictions = np.argmax(sigmoid_predictions, axis=-1)
            sigmoid_predictions = sigmoid_predictions.reshape(-1, 256)
            working_image = working_image.flatten()
        else:
            sigmoid_predictions = sigmoid_predictions.reshape(-1, 3, 256)
            working_image = working_image.reshape(-1, 3)

        if np.sum(working_image[index]) != 0:
            num_removed += 1
        elif np.sum(working_image[index]) == 0:
            num_added += 1
        if is_grayscale:
            sigmoid_probs = sigmoid_predictions[index]
            sigmoid_probs = sigmoid_probs - np.max(sigmoid_probs)
            sigmoid_probs = np.exp(sigmoid_probs)
            sigmoid_probs = sigmoid_probs / np.sum(sigmoid_probs)
            sigmoid_indices = np.arange(sigmoid_probs.shape[0])
            working_image[index] = np.random.choice(sigmoid_indices, p=sigmoid_probs)
        else:
            for channel in range(3):
                sigmoid_probs = sigmoid_predictions[index, channel]
                sigmoid_probs = sigmoid_probs - np.max(sigmoid_probs)
                sigmoid_probs = np.exp(sigmoid_probs)
                sigmoid_probs = sigmoid_probs / np.sum(sigmoid_probs)
                sigmoid_indices = np.arange(sigmoid_probs.shape[0])
                chosen_index = np.random.choice(sigmoid_indices, p=sigmoid_probs)
                working_image[index, channel] = chosen_index

        working_image = np.reshape(working_image, [1, *image_shape])
        if i % 50 == 0:
            if is_debug:
                print("softmax")
                softmax_predictions = softmax_predictions.reshape(image_shape[:-1])
                heatmap = sb.heatmap(deepcopy(softmax_predictions))
                plt.show()
                # print("sigmoid")
                # sigmoid_predictions = np.argmax(sigmoid_predictions, axis=-1).reshape(image_shape[:-1])
                # heatmap = sb.heatmap(deepcopy(sigmoid_predictions))
                # plt.show()
            create_image(working_image, os.path.join(directory, "working_{}.png".format(i)), image_shape=image_shape)

    final_image = working_image
    final_binary_image = deepcopy(final_image)
    final_binary_image[final_binary_image > 0] = 1
    create_image(final_binary_image, os.path.join(directory, "final_binary.png"), image_shape=image_shape)

    print(final_image.shape)
    print("num added: {}. num fixed: {}".format(num_added, num_removed))
    img = create_image(final_image, os.path.join(directory, 'final.png'), image_shape=image_shape)
    return img, deepcopy(final_image)

In [0]:
# model = keras.models.load_model(model_location)

drive_folder = '/content/drive/My Drive'

current_model_name = model_location.split('My Drive/')[-1].split('.hdf5')[0]

model_names = [current_model_name,
               # 'sc-model-es-net-60000-4',
               #'sc-model-es-net-60000-16',
               # 'sc-model-es-net-mnist-grayscale-double-softmax',
               #'sc-model-nade-60000-4'
               # 'checkpoints/nade-cifar-color-double-softmax-0-single'
               # 'checkpoints/nade-cifar-color-double-softmax-num-samples-10-noise-upper-0-single'
               # 'checkpoints/nade-cifar-color-double-softmax-num-samples-1-noise-upper-0-single'
               # 'checkpoints/model_full_name'
               ]

config = {
    'sc-model-es-net-60000-4': {
        "iterations": 300,
        "temp_start": 0.99,
        "temp_end": 0.99,
        "top_k": 10000
    },
    current_model_name: {
        "iterations": 1500,
        "temp_start": 1,
        "temp_end": 1,
        "top_k": 10000
    },
    'checkpoints/nade-cifar-color-double-softmax-num-samples-10-noise-upper-0-single': {
        "iterations": 1500,
        "temp_start": 1,
        "temp_end": 1,
        "top_k": 10000
    },
    'checkpoints/nade-cifar-color-double-softmax-num-samples-1-noise-upper-0-single': {
        "iterations": 1500,
        "temp_start": 1,
        "temp_end": 1,
        "top_k": 10000
    },
    'checkpoints/sc-model-es-net-cifar-color-double-softmax-1-single': {
        "iterations": 3000,
        "temp_start": 1,
        "temp_end": 1,
        "top_k": 10000
    },
    'checkpoints/model_full_name': {
        "iterations": 3000,
        "temp_start": 0.99,
        "temp_end": 0.99,
        "top_k" : 10000
    },

}

sample_sqrt = 3
for model_name in model_names:
    model = keras.models.load_model(os.path.join(drive_folder, model_name + '.hdf5'))
    # model = keras.models.load_model(F'/content/drive/My Drive/checkpoints/model_full_name.hdf5')
    model_config = config[model_name]
    generated_images = []
    for i in range(sample_sqrt**2):
        directory = "images_{}".format(i)
        os.makedirs(directory, exist_ok=True)
        input_image = generate_noise()
        # input_image = np.expand_dims(np.expand_dims(images[i], 0), -1)

        img, _ = inference(model, input_image, directory, 
                           model_config['iterations'], temp_start=model_config['temp_start'], 
                           temp_end=model_config['temp_end'], top_k=model_config['top_k'], 
                           is_grayscale=is_grayscale, is_debug=False)
        generated_images.append(img)
    
    final_im = Image.new('RGB', (image_shape[0] * sample_sqrt, image_shape[1] * sample_sqrt))

    y_offset = 0
    for i in range(sample_sqrt):
        x_offset = 0
        new_im = Image.new('RGB', (image_shape[0] * sample_sqrt, image_shape[1]))
        for j in range(sample_sqrt):
            im = deepcopy(generated_images[(i * sample_sqrt) + j])
            new_im.paste(im, (x_offset, 0))
            x_offset += image_shape[0]
        final_im.paste(new_im, (0, y_offset))
        y_offset += image_shape[0]
        
    model_name = model_name.split('/')[-1]
    final_im.save(model_name + '.png')