# Making Your Model Learn Addition!
## Task 1: Introduction

Given the string "54+7", the model should return a prediction: "61".

In [1]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

## Task 2: Generate Data
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [2]:
all_chars = '0123456789+'

In [3]:
num_features = len(all_chars)
print("Number of features : ", num_features)

# tokenized representation of characters.
char_to_index = dict((c, i) for i, c in enumerate(all_chars))
index_to_char = dict((i, c) for i, c in enumerate(all_chars))

Number of features :  11


In [5]:
def generate_data():
    first_num = np.random.randint(0, 100)
    second_num = np.random.randint(0, 100)
    
    example = str(first_num) + '+' + str(second_num)
    label = str(first_num + second_num)
    
    return example, label
generate_data()

('34+91', '125')

## Task 3: Create the Model
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___
Consider these two reviews:

Review 1: This movie is not terrible at all.

Review 2: This movie is pretty decent.

In [6]:
hidden_units = 128
max_time_steps = 5 # len(99+99) = 5

model =Sequential()
# encoder
model.add(SimpleRNN(hidden_units, input_shape = (None, num_features)))
# create vector representation of entire input sequence.
model.add(RepeatVector(max_time_steps))

# decoder
model.add(SimpleRNN(hidden_units, return_sequences = True))
# apply dense layer to diff time steps and the hidden states are diff for diff time steps
model.add(TimeDistributed(Dense(num_features, activation = 'softmax')))

In [7]:
# compile the model
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

# summary of the model
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 128)               17920     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 11)             1419      
Total params: 52,235
Trainable params: 52,235
Non-trainable params: 0
_________________________________________________________________


## Task 4: Vectorize and De-Vectorize Data
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [11]:
def vectorize_example(example, label):
    x = np.zeros((max_time_steps, num_features))
    y = np.zeros((max_time_steps, num_features))
    
    diff_x = max_time_steps - len(example)
    diff_y = max_time_steps - len(label)
    
    for i, c in enumerate(example):
        x[i + diff_x, char_to_index[c]] = 1
    # pad 0 to the left.    
    for i in range(diff_x):
        x[i, char_to_index['0']] = 1
    
    for i, c in enumerate(label):
        y[i + diff_y, char_to_index[c]] = 1
    
    # pad 0 to the left
    for i in range(diff_y):
        y[i, char_to_index['0']] = 1
    
    return x, y
e, l = generate_data()
print("Example : ", e)
print("Label : ", l)

x, y = vectorize_example(e, l)
print("Shape : ", x.shape)
print(x)
print("------------")
print(y)


Example :  88+57
Label :  145
Shape :  (5, 11)
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]
------------
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [14]:
print(np.argmax(x, axis = 1)) # here 10 represents '+' operator
print(np.argmax(y, axis = 1))

[ 8  8 10  5  7]
[0 0 1 4 5]


In [17]:
def devectorize_example(example):
    result = [index_to_char[np.argmax(vec)] for i, vec in enumerate(example)]
    return ''.join(result)

'88+57'

In [18]:
devectorize_example(x)

'88+57'

In [20]:
devectorize_example(y)

'00145'

## Task 5: Create Dataset
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [22]:
def create_dataset(num_examples = 2000):
    x = np.zeros((num_examples, max_time_steps, num_features))
    y = np.zeros((num_examples, max_time_steps, num_features))
    
    for i in range(num_examples):
        e, l = generate_data()
        e_vec, l_vec = vectorize_example(e, l)
        x[i], y[i] = e_vec, l_vec
    
    return x, y

x, y = create_dataset()
print(x.shape)
print(y.shape)

(2000, 5, 11)
(2000, 5, 11)


In [25]:
print("x : ", devectorize_example(x[0]))

x :  98+27


In [26]:
print("y : ", devectorize_example(y[0]))

y :  00125


## Task 6: Training the Model
___
Note: If you are starting the notebook from this task, you can run cells from all previous tasks in the kernel by going to the top menu and then selecting Kernel > Restart and Run All
___

In [28]:
# defining the callbacks
l_cb = LambdaCallback(
    on_epoch_end = lambda e, l : print('{:.2f}'.format(l['val_acc']), end = " _ ")
)

es_cb = EarlyStopping(monitor = 'val_loss', patience = 10)

# fit the model on the training data
model.fit(x, y, epochs = 500, batch_size = 256, validation_split = 0.2, verbose = False, callbacks = [es_cb, l_cb])

0.54 _ 0.59 _ 0.59 _ 0.60 _ 0.61 _ 0.62 _ 0.61 _ 0.62 _ 0.63 _ 0.64 _ 0.64 _ 0.64 _ 0.66 _ 0.64 _ 0.64 _ 0.65 _ 0.65 _ 0.66 _ 0.67 _ 0.67 _ 0.67 _ 0.69 _ 0.70 _ 0.69 _ 0.69 _ 0.71 _ 0.70 _ 0.71 _ 0.72 _ 0.73 _ 0.72 _ 0.72 _ 0.74 _ 0.73 _ 0.73 _ 0.75 _ 0.75 _ 0.75 _ 0.76 _ 0.76 _ 0.76 _ 0.77 _ 0.77 _ 0.77 _ 0.77 _ 0.76 _ 0.79 _ 0.77 _ 0.78 _ 0.80 _ 0.79 _ 0.81 _ 0.80 _ 0.82 _ 0.82 _ 0.83 _ 0.84 _ 0.84 _ 0.84 _ 0.85 _ 0.85 _ 0.85 _ 0.84 _ 0.84 _ 0.84 _ 0.86 _ 0.87 _ 0.88 _ 0.87 _ 0.89 _ 0.88 _ 0.89 _ 0.89 _ 0.89 _ 0.89 _ 0.90 _ 0.90 _ 0.90 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.91 _ 0.92 _ 0.92 _ 0.92 _ 0.91 _ 0.93 _ 0.92 _ 0.92 _ 0.92 _ 0.92 _ 0.92 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.92 _ 0.93 _ 0.93 _ 0.92 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.93 _ 0.94 _ 0.93 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.93 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.94 _ 0.95 _ 0.95 _ 0.94 _ 0.94 _ 0.95 _ 0.94 _ 0.94 _ 0.95 _ 0.95 _ 0.95 _

<tensorflow.python.keras.callbacks.History at 0x254694c83c8>

In [34]:
# predicting on unseen data
x_test, y_test = create_dataset(num_examples = 15)
preds = model.predict(x_test)

for i, pred in enumerate(preds):
    y = devectorize_example(y_test[i])
    y_hat = devectorize_example(pred)
    col = 'green'
    
    if y != y_hat:
        colr = 'red'
    out = 'Input : ' + devectorize_example(x_test[i]) + ' Actual : ' + y + ' Prediction : ' + y_hat
    print(colored(out, col))

[32mInput : 33+45 Actual : 00078 Prediction : 00078[0m
[32mInput : 41+41 Actual : 00082 Prediction : 00082[0m
[32mInput : 08+96 Actual : 00104 Prediction : 00104[0m
[32mInput : 98+29 Actual : 00127 Prediction : 00127[0m
[32mInput : 92+56 Actual : 00148 Prediction : 00148[0m
[32mInput : 25+17 Actual : 00042 Prediction : 00042[0m
[32mInput : 04+57 Actual : 00061 Prediction : 00051[0m
[32mInput : 74+27 Actual : 00101 Prediction : 00101[0m
[32mInput : 22+59 Actual : 00081 Prediction : 00081[0m
[32mInput : 10+35 Actual : 00045 Prediction : 00045[0m
[32mInput : 22+11 Actual : 00033 Prediction : 00033[0m
[32mInput : 76+90 Actual : 00166 Prediction : 00166[0m
[32mInput : 010+4 Actual : 00014 Prediction : 00013[0m
[32mInput : 36+64 Actual : 00100 Prediction : 00100[0m
[32mInput : 44+94 Actual : 00138 Prediction : 00138[0m
