# Simple 2D convolutions

A simple 2D convolution implementation.

In [10]:
# Simple implementation of convolution of different types in tensorflow.
import tensorflow as tf # ML
import numpy as np # matrix maths

## Making toy data

We create simple toy data. Each sample is a 10x10 image which has a single float value, like RGB has 3 values.

For output we have 3 classes.

In [2]:
# params
x_dim = 10 # width of matrix
y_dim = 10 # heigth of matrix
max_val = 30 # maximum value in matrix
min_val = 0 # minimum value in matrix
samples = 100 # number of samples
classes = 3 # number of classes
n_epochs = 100 # number of epochs to train on

In [3]:
# making input data
data_x = np.random.randint(high = max_val, low = min_val, size = (samples, y_dim, x_dim, 1)).astype(np.float32)
print('Shape of input data:', data_x.shape[1:])

# making output data
data_y = np.zeros(shape = (samples, classes))
for i in range(len(data_y)):
    data_y[i][np.random.randint(classes)] = 1.0
print('Shape of output data:', data_y.shape[1:])

Shape of input data: (10, 10, 1)
Shape of output data: (3,)


### Making a simple 2D convolutional model

Input --> Convolution --> Dense --> Output --> Loss <-- Actual output

In [4]:
# defining the placeholder
x = tf.placeholder(tf.float32, shape = (None, y_dim, x_dim, 1), name = 'x')
y = tf.placeholder(tf.float32, shape = (None, classes), name = 'y')

In [5]:
'''
For convolution API given
tf.nn.conv2d(input, filter, strides, padding, ...)
input = input to the convolution layer
filter = weights to the convolution layer
strides = A 4-D tensor depending upon the data input format
padding = "SAME" or "VALID"
'''

# ___Convolution Layer___
# defining the filter a.k.a weights
# [4, 4, 1, 16] --> the convulution layer will compute 16 feature for a window of 4x4
W_conv = tf.Variable(tf.truncated_normal([4, 4, 1, 16]))
# defining biases for the layer
b_conv = tf.Variable(tf.truncated_normal([16]))
# doing the convolution
h_conv = tf.nn.conv2d(x, W_conv, [1, 1, 1, 1], padding = 'SAME') + b_conv
h_conv = tf.nn.max_pool(h_conv, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')

# The output of max_pool will be an image of size 5x5 and each [pixel, for lack of a better word]
# having 16 features, exactly like in an RGB image each pixel has 3 feature

# ___Densely Connected Layer___
W_dense = tf.Variable(tf.truncated_normal([5*5*16, classes]))
b_dense = tf.Variable(tf.truncated_normal([classes]))
# we need to flatten the output of convolution layer so that we can feed it to a Dense Layer
h_conv_flat = tf.reshape(h_conv, [-1, 5*5*16])
y_ = tf.matmul(h_conv_flat, W_dense) + b_dense

In [6]:
# Determining the loss function and accuracy function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = y_)
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
# Now we train the model
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for e in range(n_epochs):
    feed_dict = {x: data_x, y: data_y}
    _, = sess.run([train_step], feed_dict = feed_dict)
    if (e+1) % 10 == 0:
        acc = sess.run([accuracy], feed_dict = feed_dict)
        print('Epoch {0}, accuracy : {1}'.format(e+1, acc[0]))

Epoch 10, accuracy : 0.25999999046325684
Epoch 20, accuracy : 0.33000001311302185
Epoch 30, accuracy : 0.3199999928474426
Epoch 40, accuracy : 0.30000001192092896
Epoch 50, accuracy : 0.3100000023841858
Epoch 60, accuracy : 0.3100000023841858
Epoch 70, accuracy : 0.30000001192092896
Epoch 80, accuracy : 0.3199999928474426
Epoch 90, accuracy : 0.33000001311302185
Epoch 100, accuracy : 0.33000001311302185


As we can see the accuracy increases with time.