# Manifold Mixup Keras
- Keras implementation of manifold mixup (mixup on hidden layer outputs rather than raw inputs)
- Keras makes it quite difficult to implement an elegant solution as it has quite a rigid structure
- Basically do mixup on labels in the batch generation and pass on the sampled lambda to the network so it can do the mixup on the layer outputs later
- If I wanted to do the mixup in one place I would have to create a custom loss layer that takes the mixed up labels instead of the ones supplied by the batch (-_-)
- Havent tinkered around with hyperparams, at first glance mixup seems to hurt performance
- TODO: tsne on embeddings w/o mixup to see if structure is indeed flat
- TODO: test to see if combination actually is bug free

In [33]:
import sys
import numpy as np

import keras
from keras import regularizers
from keras import backend as K
from keras.utils import np_utils
from keras.datasets import cifar10
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Input

## Data

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
 
#z-score
mean = np.mean(x_train,axis=(0,1,2,3))
std = np.std(x_train,axis=(0,1,2,3))
x_train = (x_train-mean)/(std+1e-7)
x_test = (x_test-mean)/(std+1e-7)
 
num_classes = 10
y_train = np_utils.to_categorical(y_train,num_classes)
y_test = np_utils.to_categorical(y_test,num_classes)

## Model

In [34]:
lambda_ = Input(shape=(1,))
input_1 = Input(shape=x_train.shape[1:])
input_2 = Input(shape=x_train.shape[1:])

###############################

input_ = Input(shape=x_train.shape[1:])

block1 = Conv2D(32, (3,3), padding='same')(input_)
block1 = Activation('elu')(block1)
block1 = BatchNormalization()(block1)
block1 = Conv2D(32, (3,3), padding='same')(block1)
block1 = Activation('elu')(block1)
block1 = BatchNormalization()(block1)
block1 = MaxPooling2D()(block1)
block1 = Dropout(0.2)(block1)

block1 = Model(inputs=input_, outputs=block1, name="block1")


block1_out_1 = block1(input_1)
block1_out_2 = block1(input_2)


###############################
# Manifold Mixup

# lambda_ is conceputally a scalar, but had to encode it into a vector of same length as the batch otherwise keras will complain
layer_mixup = keras.layers.Lambda(lambda inputs: inputs[2][0] * inputs[0] +  (1 - inputs[2][0]) * inputs[1], name="Mixup")
block1_out = layer_mixup([block1_out_1, block1_out_2, lambda_])


###############################
block2_in = Input(shape=block1_out.shape[1:].as_list())

block2 = Conv2D(64, (3,3), padding='same')(block2_in)
block2 = Activation('elu')(block2)
block2 = BatchNormalization()(block2)
block2 = Conv2D(64, (3,3), padding='same')(block2)
block2 = Activation('elu')(block2)
block2 = BatchNormalization()(block2)
block2 = MaxPooling2D()(block2)
block2 = Dropout(0.3)(block2)

block2 = Model(inputs=block2_in, outputs=block2, name="block2")
block2_out = block2(block1_out)
 
###############################
 
final_in = Input(shape=block2_out.shape[1:].as_list())

final = Flatten()(final_in)
final = Dense(num_classes, activation='softmax')(final)

final = Model(inputs=final_in, outputs=final, name="final")
final_out = final(block2_out)


###############################


model = Model(inputs=[input_1, input_2, lambda_], outputs=final_out)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_22 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
input_23 (InputLayer)           (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
block1 (Model)                  (None, 16, 16, 32)   10400       input_22[0][0]                   
                                                                 input_23[0][0]                   
__________________________________________________________________________________________________
input_21 (InputLayer)           (None, 1)            0                                            
__________

## Training

In [29]:
def mixup_preproc(batch_X, batch_y, alpha=1., dummy=False):
    if dummy:
        lambda_ = np.ones(len(batch_X))
        return [batch_X, batch_X, lambda_], batch_y
    
    indices = list(range(len(batch_X)))
    np.random.shuffle(indices)  # shuffles inplace
    
    shuffled_X = batch_X[indices]
    shuffled_y = batch_y[indices]
    
    
    if alpha > 0:
        lambda_ = np.random.beta(alpha, alpha)
    else:
        lambda_ = 1
        
    new_y = lambda_ * batch_y + (1 - lambda_) * shuffled_y
    lambda_ = np.array([lambda_] * len(batch_X))
    return [batch_X, shuffled_X, lambda_], new_y

def mixup_preproc_gen(generator, dummy=False):
    while True:
        yield mixup_preproc(*next(generator), dummy=dummy)
    

def train(mixup: bool = False, n_epochs: int = 10):
    batch_size = 64

    #data augmentation
    datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        )

    datagen.fit(x_train)
    gen = datagen.flow(x_train, y_train, batch_size=batch_size)
    gen_mixup = mixup_preproc_gen(gen, dummy=not mixup)

    opt_rms = keras.optimizers.rmsprop(lr=0.001, decay=1e-6)
    model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy'])
    model.fit_generator(gen_mixup,
                        steps_per_epoch=x_train.shape[0] // batch_size, 
                        epochs=n_epochs,
                        verbose=1, 
                        validation_data=([x_test, x_test, np.ones(len(x_test))], y_test)
                       )
    return model

### 1. Benchmark

In [30]:
benchmark = train(mixup=False, n_epochs=10)
scores = benchmark.evaluate([x_test, x_test, np.ones(len(x_test))], y_test, batch_size=128, verbose=1)
print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test result: 79.950 loss: 0.601


### 2. Mixup

In [32]:
mixup = train(mixup=True, n_epochs=10)
scores = mixup.evaluate([x_test, x_test, np.ones(len(x_test))], y_test, batch_size=128, verbose=1)
print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Test result: 75.360 loss: 0.764
