# Four Arithmetic Deep Neural Network  
Challenge training four arithmetic operation with deep neural network.  
This NN compute within three-digits number without zero.  

## Create Dataset

In [1]:
import numpy as np

X_train = np.random.randint(1, 100, size=(1000000, 2))
X_test = np.random.randint(1, 100, size=(10000, 2))
y_train_add = np.sum(X_train, axis=1)
y_test_add = np.sum(X_test, axis=1)
y_train_sub = np.subtract(X_train[:, 0], X_train[:, 1])
y_test_sub = np.subtract(X_test[:, 0], X_test[:, 1])
y_train_mul = np.multiply(X_train[:, 0], X_train[:, 1])
y_test_mul = np.multiply(X_test[:, 0], X_test[:, 1])
y_train_div = np.divide(X_train[:, 0], X_train[:, 1])
y_test_div = np.divide(X_test[:, 0], X_test[:, 1])
train_data = np.c_[X_train, y_train_add, y_train_sub, y_train_mul, y_train_div]
test_data = np.c_[X_test, y_test_add, y_test_sub, y_test_mul, y_test_div]
y_train, y_test = train_data[:, 2:], test_data[:, 2:]

## fetch data function

In [2]:
def fetch_data(X, y, batch_size):
    n, _ = X.shape
    batch_index = np.random.randint(0, n, size=batch_size)
    X_batch, y_batch = X[batch_index], y[batch_index]
    return X_batch, y_batch

In [3]:
import tensorflow as tf

## Constract branched DNN  
Branched DNN architecture image below. (Maybe this problem do not need to be branched)

<img src="four_arithmetic_branched_DNN_image.png">

In [4]:
n_inputs = 2
n_hidden1 = 16
n_hidden_add = 32
n_hidden_sub = 32
n_hidden_mul = 32
n_hidden_div = 46
n_hidden2 = 64
n_outputs = 4
dropout_rate = 0.4

In [5]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, 2), name='X')
y = tf.placeholder(tf.float32, shape=(None, 4), name='y')
with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.leaky_relu, name='hidden1')
    add = tf.layers.dense(hidden1, n_hidden_add, activation=tf.nn.leaky_relu, name='add')    
    sub = tf.layers.dense(hidden1, n_hidden_sub, activation=tf.nn.leaky_relu, name='sub')
    mul = tf.layers.dense(hidden1, n_hidden_mul, activation=tf.nn.leaky_relu, name='mul')
    div = tf.layers.dense(hidden1, n_hidden_div, activation=tf.nn.leaky_relu, name='div')
    four_concat = tf.concat([add, sub, mul, div], 1)
    hidden2 = tf.layers.dense(four_concat, n_hidden2, activation=tf.nn.leaky_relu, name='hidden2')
    output = tf.layers.dense(hidden2, n_outputs, name='output')

with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.square(output - y), name='loss')
    
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    mse = tf.reduce_mean(tf.square(output - y), name='mse')

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [6]:
mse_summary = tf.summary.scalar('MSE', mse)
logdir = 'tf_logs/four_arithmetic_branched_DNN'
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

### Train DNN

In [8]:
n_epochs = 2000
batch_size = 20000
n_batchies = int(X_train.shape[0] // batch_size)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_cnt in range(n_batchies):
            X_batch, y_batch = fetch_data(X_train, y_train, batch_size=batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 50 == 0:
            MSE = mse_summary.eval(feed_dict={X: X_train, y: y_train})
            step = epoch * n_batchies + batch_cnt
            file_writer.add_summary(MSE, step)
    test_MSE = mse.eval(feed_dict={X: X_test, y: y_test})
    print('test MSE: ', test_MSE)
    saver.save(sess, 'my_models/four_arithmetic_branched_DNN')
file_writer.close()

test MSE:  51.00937


### Test trained DNN

In [10]:
with tf.Session() as sess:
    saver.restore(sess, 'my_models/four_arithmetic_branched_DNN')
    y_pred = output.eval(session=sess, feed_dict={X: X_test})
    pred_set = np.float16(np.c_[X_test, y_pred])
    print(pred_set)

INFO:tensorflow:Restoring parameters from my_models/four_arithmetic_branched_DNN
[[ 1.5000e+01  9.2000e+01  1.0706e+02 -7.7062e+01  1.3880e+03 -7.2363e-01]
 [ 7.9000e+01  2.4000e+01  1.0294e+02  5.5188e+01  1.9060e+03  3.3711e+00]
 [ 6.4000e+01  7.9000e+01  1.4262e+02 -1.4875e+01  5.0360e+03  1.2441e+00]
 ...
 [ 3.0000e+00  3.7000e+01  4.0125e+01 -3.3438e+01  1.2050e+02 -7.5098e-01]
 [ 4.5000e+01  8.9000e+01  1.3388e+02 -4.3938e+01  3.9940e+03  6.2695e-01]
 [ 2.8000e+01  4.5000e+01  7.2875e+01 -1.6500e+01  1.2660e+03  1.6396e+00]]


In [11]:
print(y_test)

[[ 1.07000000e+02 -7.70000000e+01  1.38000000e+03  1.63043478e-01]
 [ 1.03000000e+02  5.50000000e+01  1.89600000e+03  3.29166667e+00]
 [ 1.43000000e+02 -1.50000000e+01  5.05600000e+03  8.10126582e-01]
 ...
 [ 4.00000000e+01 -3.40000000e+01  1.11000000e+02  8.10810811e-02]
 [ 1.34000000e+02 -4.40000000e+01  4.00500000e+03  5.05617978e-01]
 [ 7.30000000e+01 -1.70000000e+01  1.26000000e+03  6.22222222e-01]]


## Constract non branched DNN
non branched architecture image below

<img src=''>

<img src='four_arithmetic_non_branched_DNN_image.png'>

In [12]:
n_inputs = 2
n_hidden1 = 16

n_hidden_add = 32
n_hidden_sub = 32
n_hidden_mul = 32
n_hidden_div = 46

n_hidden2 = n_hidden_add + n_hidden_sub + n_hidden_mul + n_hidden_div
n_hidden3 = 64
n_outputs = 4

In [13]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, 2), name='X')
y = tf.placeholder(tf.float32, shape=(None, 4), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.leaky_relu, name='hidden1')
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.leaky_relu, name='hidden2')    
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.leaky_relu, name='hidden3')
    output = tf.layers.dense(hidden3, n_outputs, name='output')

with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.square(output - y), name='loss')
    
with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    mse = tf.reduce_mean(tf.square(output - y), name='mse')

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [14]:
mse_summary = tf.summary.scalar('MSE', mse)
logdir = 'tf_logs/four_arithmetic_non_branched_DNN'
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

### Train DNN

In [16]:
n_epochs = 2000
batch_size = 20000
n_batchies = int(X_train.shape[0] // batch_size)

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for batch_cnt in range(n_batchies):
            X_batch, y_batch = fetch_data(X_train, y_train, batch_size=batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        if epoch % 50 == 0:
            MSE = mse_summary.eval(feed_dict={X: X_train, y: y_train})
            step = epoch * n_batchies + batch_cnt
            file_writer.add_summary(MSE, step)
    test_MSE = mse.eval(feed_dict={X: X_test, y: y_test})
    print('test MSE: ', test_MSE)
    saver.save(sess, 'my_models/four_arithmetic_non_branched_DNN')
file_writer.close()

test MSE:  25.332294


## Summary  
<img src='four_arithmetic_MSE_image.png'>
Maybe there is no difference between branched to non-branched deep neural network.(I did not do many validate. So do not trust.)  
Divide is difficult ;(  
Maybe other computings are good.

test concat

In [None]:
tf.reset_default_graph()
init = tf.global_variables_initializer()

a = tf.constant([[1,2,3], [4,5,6]], dtype=tf.int16)
b = tf.constant([[7,8,9], [10,11,12]], dtype=tf.int16)
c = tf.constant([[13,14,15], [16,17,18]], dtype=tf.int16)
d = tf.constant([[19,20,21], [22,23,24]], dtype=tf.int16)
concat = tf.concat([a,b,c,d], 1)
with tf.Session() as sess:
    init.run()
    con_con = concat.eval()
    print(con_con)