In [1]:
#import libraries and setup 
import keras
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import logging
logging.getLogger('tensorflow').disabled = True
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, UpSampling2D, Reshape, Concatenate, Input
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
es = EarlyStopping(monitor='val_loss', mode='min', verbose=2, patience=10, restore_best_weights=True)

In [2]:
# import mnist
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(x_train.shape,y_train.shape)

(60000, 28, 28) (60000,)


In [3]:
#normalize data
if x_train.max() >1:
    x_train = x_train / 255
    x_test = x_test / 255

default_shape = x_train.shape
#reshape input data to 1 channel
x_train = x_train.reshape(-1,default_shape[1],default_shape[2],1)
x_test = x_test.reshape(-1,default_shape[1],default_shape[2],1)
image_dim = x_train.shape[1:]

In [4]:
# duplicate encoders and freeze weights
encoder1 = keras.models.load_model('C:\Mounir\MyPython\work\mnist_autoencoder\encoder') 
encoder1._name = 'encoder1'
encoder1.trainable = False

encoder2 = keras.models.load_model('C:\Mounir\MyPython\work\mnist_autoencoder\encoder')
encoder2._name = 'encoder2'
encoder2.trainable = False

In [5]:
# create model to learn it all
input1 = Input(shape=image_dim)
input2 = Input(shape=image_dim)
enc1_out = encoder1(input1)
enc2_out = encoder2(input2)
model_c = Concatenate()([enc1_out,enc2_out])
model_c = Dense(1000,activation='relu')(model_c)

model_b1 = Dense(200,activation='relu')(model_c)
model_b2 = Dense(200,activation='relu')(model_c)

#model_b1 = Dense(100,activation='relu')(model_b1)
#model_b2 = Dense(100,activation='relu')(model_b2)

model_b3 = Dense(200,activation='relu')(model_c)
model_b4 = Dense(200,activation='relu')(model_c)

#model_b3 = Dense(100,activation='relu')(model_b3)
#model_b4 = Dense(100,activation='relu')(model_b4)

model_b5 = Dense(200,activation='relu')(model_c)
#model_b5 = Dense(100,activation='relu')(model_b5)


units_add =  Dense(10,activation='softmax',name ='units_add')(model_b1)
tens_add = Dense(1,activation='sigmoid',name ='tens_add')(model_b2)

units_mult =  Dense(10,activation='softmax',name ='units_mult')(model_b3)
tens_mult = Dense(9,activation='softmax',name ='tens_mult')(model_b4)

comp =  Dense(1,activation='sigmoid',name ='comp')(model_b5)



model_complete = Model(inputs=[input1,input2],outputs=[units_add,tens_add,units_mult,tens_mult,comp])

model_complete.compile(optimizer='nadam', loss = ['categorical_crossentropy','binary_crossentropy','categorical_crossentropy','categorical_crossentropy','binary_crossentropy'], metrics=['acc'])

In [6]:
# generate a dataset for additions
train_size = 200000
random_labels1 = np.random.randint(0,25000,train_size)
random_labels2 = np.random.randint(0,25000,train_size)

x_train_1 = x_train[random_labels1]
x_train_2 = x_train[random_labels2]

y_train_1 = y_train[random_labels1]
y_train_2 = y_train[random_labels2]

y_add = y_train_1 + y_train_2
y_add_tens = y_add //10 
y_add_units = y_add %10 
y_add_units_cat = to_categorical(y_add_units)


# the same with x_test
test_size = 5000
random_labels1 = np.random.randint(0,10000,test_size)
random_labels2 = np.random.randint(0,10000,test_size)

x_test_1 = x_test[random_labels1]
x_test_2 = x_test[random_labels2]

y_test_1 = y_test[random_labels1]
y_test_2 = y_test[random_labels2]

y_test_add = y_test_1 + y_test_2
y_test_add_tens = y_test_add //10 
y_test_add_units = y_test_add %10 
y_test_add_units_cat = to_categorical(y_test_add_units)

# generate a dataset for multiplication

y_mult = y_train_1 * y_train_2
y_mult_tens = y_mult //10 
y_mult_units = y_mult %10 
y_mult_units_cat = to_categorical(y_mult_units)
y_mult_tens_cat = to_categorical(y_mult_tens)

# the same with x_test

y_test_mult = y_test_1 * y_test_2
y_test_mult_tens = y_test_mult //10 
y_test_mult_units = y_test_mult %10 
y_test_mult_units_cat = to_categorical(y_test_mult_units)
y_test_mult_tens_cat = to_categorical(y_test_mult_tens)

# generate a dataset for comparison
y_comp = y_train_1 > y_train_2

# the same with x_test
y_test_comp = y_test_1 > y_test_2

In [7]:
history = model_complete.fit([x_train_1,x_train_2],[y_add_units_cat,y_add_tens,y_mult_units_cat,y_mult_tens_cat,y_comp], batch_size=100,epochs=1000,validation_split=0.2, verbose=2,callbacks=[es,es,es,es,es,es])

Epoch 1/1000
1600/1600 - 45s - loss: 2.5787 - units_add_loss: 0.9021 - tens_add_loss: 0.2324 - units_mult_loss: 0.6609 - tens_mult_loss: 0.5536 - comp_loss: 0.2297 - units_add_acc: 0.7070 - tens_add_acc: 0.9001 - units_mult_acc: 0.7769 - tens_mult_acc: 0.8067 - comp_acc: 0.9022 - val_loss: 1.2447 - val_units_add_loss: 0.4144 - val_tens_add_loss: 0.1246 - val_units_mult_loss: 0.3073 - val_tens_mult_loss: 0.2762 - val_comp_loss: 0.1222 - val_units_add_acc: 0.8749 - val_tens_add_acc: 0.9541 - val_units_mult_acc: 0.9050 - val_tens_mult_acc: 0.9100 - val_comp_acc: 0.9538
Epoch 2/1000
1600/1600 - 44s - loss: 0.9041 - units_add_loss: 0.2942 - tens_add_loss: 0.0919 - units_mult_loss: 0.2271 - tens_mult_loss: 0.1986 - comp_loss: 0.0923 - units_add_acc: 0.9120 - tens_add_acc: 0.9666 - units_mult_acc: 0.9297 - tens_mult_acc: 0.9361 - comp_acc: 0.9663 - val_loss: 0.7266 - val_units_add_loss: 0.2383 - val_tens_add_loss: 0.0701 - val_units_mult_loss: 0.1812 - val_tens_mult_loss: 0.1622 - val_comp_lo

In [None]:
test = model_complete.evaluate([x_test_1,x_test_2],[y_test_add_units_cat,y_test_add_tens,y_test_mult_units_cat,y_test_mult_tens_cat,y_test_comp], batch_size=100)

In [None]:
random_label_1 = np.random.randint(0,9999)
random_label_2 = np.random.randint(0,9999)

img_sample1 = x_test[random_label_1,:,:].reshape((1,28,28,1))
img_sample2 = x_test[random_label_2,:,:].reshape((1,28,28,1))

plt.subplot(1,2,1)
plt.imshow(img_sample1.reshape(28,28), cmap='gray');

plt.subplot(1,2,2)
plt.imshow(img_sample2.reshape(28,28), cmap='gray');

prediction = model_complete.predict([img_sample1,img_sample2])
unit_add = prediction[0]
ten_add = prediction[1]
unit_mult = prediction[2]
ten_mult = prediction[3]


sum_images = np.argmax(unit_add)+10*np.round(ten_add)
print('sum =',sum_images)


mult_images = np.argmax(unit_mult)+10*np.argmax(ten_mult)
print('multiplication result =',mult_images)

print('comparison result =',np.round(prediction[4]),'1 if the number on the left is greater, 0 elsewhere')