# About OCR approach1:
Through ocr1.py script we are targeting to train a small Convolutional Neurl Network (CNN)  with the data we generated using random_string_data_gen.py. Network should be able to recognize the random string in a given image and provide it as ouput.  
In the first phase we will be testing it using generated data itself whereas later we try to crop some image from our screen meeting dataset contrain and see how well network works.  
Link to blog: https://medium.com/@vijendra1125/ocr-part-2-ocr-using-cnn-f43f0cee8016

# **Load Libraries**

In [None]:
import os
from datetime import datetime as dt
import string
import numpy as np
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf

# Settings

In [None]:
np.set_printoptions(threshold=sys.maxsize)
bold = '\033[1m'
end = '\033[0m'

# Train

#### Functions

In [None]:
def read_data(file_paths):
  '''
  @brief: read data from tfrecords file
  @args[in]:
    file_paths: list of path to tfrecord files
  @args[out]:
    image: an image being read from tfrecord
    label: a label being read from tfrecord corresponding to image
  '''
  file_queue=tf.train.string_input_producer(file_paths)
  feature = {'images': tf.FixedLenFeature([], tf.string),
             'labels': tf.FixedLenFeature([], tf.string)}    
  reader = tf.TFRecordReader()  
  _,record=reader.read(file_queue)
  features = tf.parse_single_example(record, features=feature)
  image = tf.decode_raw(features['images'], tf.uint8)
  label = tf.decode_raw(features['labels'], tf.uint8) 
  return image,label


def minibatch(batch_size, 
              file_paths, 
              image_size, 
              string_length, 
              class_count):
  '''
  @brief: create minibatch of data (iamge and label)
  @args[in]:
    batch_size: size of the minibatch
    file_paths: list of path to the files
    image_size: size of the image (row, columns, channels)
    string_length: length of label string (including whitespace)
    class_count: total number of classes
  @args[out]:
    image_batch: batch of image
    label_batch: batch of label
  ''' 
  image, label=read_data(file_paths)
  image = tf.cast(tf.reshape(image,image_size), dtype = tf.float32)
  label = tf.reshape(label, [1, string_length])
  label = tf.one_hot(label, class_count,axis=1)
  label = tf.reshape(label, tf.shape(label)[1:])
  image_batch,label_batch= tf.train.shuffle_batch([image, label],
                          batch_size, capacity, min_after_dequeue,
                          num_threads = num_of_threads)
  label_batch = tf.cast(label_batch, dtype = tf.int64)
  return image_batch, label_batch


def variable(name, shape, initializer, weight_decay = None):
  '''
  @brief: create parameter tensor
  '''
  var = tf.get_variable(name, shape, initializer = initializer)
  if weight_decay is not None:
    weight_loss=tf.multiply(tf.nn.l2_loss(var),weight_decay,name="weight_loss")
    tf.add_to_collection('losses', weight_loss)
  return var


def conv_block(block_num,
               input_data,
               weights, 
               weight_initializer=tf.contrib.layers.xavier_initializer(),
               bias_initializer=tf.constant_initializer(0.0),
               conv_op=[1,1,1,1],
               conv_padding='SAME',
               weight_decay=None,
               lrn=True,
               dropout=1.0, 
               activation=True):
  '''
  @brief: convolutional block
  '''
  with tf.variable_scope('conv'+ str(block_num), reuse = tf.AUTO_REUSE) as scope:
    input_data = tf.nn.dropout(input_data, dropout)
    kernel = variable('weights', weights, initializer = weight_initializer, weight_decay = weight_decay)
    biases = variable('biases', weights[3], initializer=bias_initializer, weight_decay=None)
    conv = tf.nn.conv2d(input_data, kernel, conv_op, padding=conv_padding)
    pre_activation = tf.nn.bias_add(conv, biases)
    if lrn==True:
      pre_activation = tf.nn.lrn(pre_activation, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm')
    if activation:
      conv_out = tf.nn.relu(pre_activation, name=scope.name)
      return conv_out
    else:
      return pre_activation


def dense_block(block_num,
                input_data,
                neurons,
                weight_initializer=tf.contrib.layers.xavier_initializer(),
                bias_initializer=tf.constant_initializer(0.0),
                weight_decay=None,
                activation=True, 
                dropout=1.0):
  '''
  @brief: Fully connected block
  '''
  with tf.variable_scope('dense'+ str(block_num), reuse = tf.AUTO_REUSE) as scope:
    input_data = tf.nn.dropout(input_data, dropout)
    weights = variable('weights', [input_data.shape[1], neurons], initializer=weight_initializer, weight_decay = weight_decay)
    biases = variable('biases', [1,neurons], initializer = bias_initializer, weight_decay = None)
    dense = tf.matmul(input_data,weights)+biases
    if activation:
      dense=tf.nn.relu(dense, name=scope.name)
    return dense
  
  
def multi_loss(logits, labels, batch_size, max_char):
  '''
  @brief: cross entopy loss for multi class
  '''
  loss = 0
  for i in range(max_char):
    loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits\
            (logits=logits[:,i,:],labels=labels[:,:,i]), \
                           name='cross_entropy_loss_mean')
  loss /= max_char
  tf.add_to_collection('losses', loss)
  total_loss=tf.add_n(tf.get_collection('losses'), name='total_loss')
  tf.add_to_collection('losses', total_loss)
  return total_loss


def parameter_update(loss, learning_rate):
  '''
  @brief: optimization and parameter update using adam optimizer
  '''
  optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
  for var in tf.trainable_variables():
    tf.summary.histogram(var.op.name, var)
  return optimizer


def accuracy_calc(output, label_batch):
  '''
  @brief: calculate accuracy
  '''
  correct_prediction = tf.equal(tf.cast(tf.argmax(output, 2),dtype=tf.int32),tf.cast(tf.argmax(label_batch, 1),dtype=tf.int32))
  accuracy=tf.reduce_mean(tf.cast(correct_prediction,"float"))
  return accuracy

### Model

In [None]:
def inference(image_batch, class_count,
              dropout=[1,1,1,1],
              wd=None):
  '''
  @brief: define architecture using building block fuctions above
  '''
  i = 0
  weights=[[3,3,1,class_count//4],
           [3,3,class_count//4,class_count//2],
           [3,3,class_count//2,class_count],
           [3,3,class_count,class_count]]
  conv_op=[[1,1,1,1],[1,1,1,1],[1,1,1,1], [1,1,1,1]]
  
  conv1 = conv_block(1,image_batch,weights[i], conv_op = conv_op[i], conv_padding='SAME', dropout=dropout[i],weight_decay=wd)
  i=i+1
  pool1=tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1,2,2,1],padding='VALID', name='pool1') #16x128
  
  conv2 = conv_block(2,pool1,weights[i], conv_op = conv_op[i], conv_padding='SAME', dropout=dropout[i],weight_decay=wd)
  i=i+1
  pool2=tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1,2,2,1],padding='VALID', name='pool2') #8x64
  
  conv3 = conv_block(3,pool2,weights[i], conv_op = conv_op[i], conv_padding='SAME', dropout=dropout[i],weight_decay=wd)
  i=i+1
  pool3=tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1,2,2,1],padding='VALID', name='pool3') #4x32
  
  conv4 = conv_block(4,pool3,weights[i], conv_op = conv_op[i], conv_padding='SAME', dropout=dropout[i],weight_decay=wd)
  pool4=tf.nn.max_pool(conv4, ksize=[1, 4, 2, 1], strides=[1,1,2,1],padding='VALID', name='pool4') #1x16
  
  flat=tf.reshape(pool4, [tf.shape(image_batch)[0], string_length, class_count], name='flat')
		
  return flat

### Parameters

In [None]:
## paths ##
# path to folder where data (tfrecord) files has been stored
folder_path = "../data"
# path to load checkpoint
checkpoint_restore = "../cp/ocr1_3to8"
# path to save checkpoint 
checkpoint_save = "../cp/ocr1_3to8"

## data realted params ##
# train and test file paths
filenames = os.listdir(folder_path)
train_file_paths = []
test_file_paths = []
for filename in filenames:
    if "train" in filename:
        train_file_paths.append(os.path.join(folder_path, filename))
    elif "test" in filename:
        test_file_paths.append(os.path.join(folder_path, filename))
# total number of data in each train tfrecord
# data_per_train_file = 8192
total number of data in each test tfrecord
data_per_test_file = 2048
# image size
image_size = [32,256,1]
# total numember of classes
class_count = 63
# string length (including whitespace)
string_length = 16

## training  setup related params ##
# restore from given checkpoint
restore = False
# dropout for each layer (1 means no drop)
dropout = [1, 1, 1, 1]
# weight decay
wd = 0.000
# learning rafe
lr = 0.01
# batch size
# batch_size = 32
batch_size = 1
# total number of epochs
epochs = 5
# after every x epoch decrease learning rate by factor of y (var_lr = [x, y])
var_lr=[None,None]
# parameters related to reading tfrecord
num_of_threads=16
min_after_dequeue=5000
capacity=min_after_dequeue+(num_of_threads+1)*batch_size

### Run

In [None]:
# data count
train_data_count = data_per_train_file * len(train_file_paths)
test_data_count = data_per_test_file * len(test_file_paths)
# steps 
train_step = train_data_count//batch_size
test_step = test_data_count//batch_size 
# build graph
with tf.Graph().as_default():
    # train graph
	x_train, y_train = minibatch(batch_size, train_file_paths, image_size, string_length, class_count)     
	logit_train = inference(x_train, class_count, dropout = dropout, wd = wd)
	cost = multi_loss(logit_train, y_train, batch_size, string_length)
	update=parameter_update(cost,lr)	
	accuracy_train = accuracy_calc(logit_train, y_train)
    # test graph
	x_test, y_test = minibatch(batch_size, test_file_paths, image_size, string_length, class_count)
	logit_test = inference(x_test, class_count)
	accuracy_test = accuracy_calc(logit_test, y_test)  
	saver = tf.train.Saver()   
    # start session
	with tf.Session() as sess:
    	# initialize the variables
		sess.run(tf.global_variables_initializer())
		sess.run(tf.local_variables_initializer())
		coord = tf.train.Coordinator()
		threads = tf.train.start_queue_runners(coord=coord)      
    	# restore the variables
		if restore == True:
			loader = tf.train.import_meta_graph(checkpoint_restore +'.meta')
			loader.restore(sess, checkpoint_restore)       
		# train for given number of epochs
		for e in range(epochs): 
			print(bold + "\nepoch:" + end, e)
			train_epoch_cost = 0
			train_epoch_acc = 0
			test_epoch_acc = 0        
            # train for given number of steps in one epoch
			for s in range(train_step):
				_,train_batch_cost = sess.run([update, cost])	          
				if s % (train_step//2) == 0 and s != 0:
					print('~', end = '')
				elif(s == (train_step) - 1):
					print('')            
				train_epoch_cost += train_batch_cost/(train_step)	          
			print(bold + "epoch_cost: " + end,train_epoch_cost)       
            # calculate accuracy of training set
			for i in range(train_step//5):
				train_epoch_acc = sess.run(accuracy_train)
				train_epoch_acc += train_epoch_acc/(train_step)        
			print(bold + "train epoch accuracy: " + end,train_epoch_acc, "\n")        
            # calculate accuracy on test set
			for i in range(test_step):
				test_epoch_acc = sess.run(accuracy_test)
				test_epoch_acc += test_epoch_acc/test_step    
			print(bold + "test epoch accuracy: " + end, test_epoch_acc, "\n")       
            # after every x epoch decrease learning rate by factor of y (var_lr = [x, y])
			if var_lr[0] != None:
				if e%var_lr[0] == 0:
					learning_rate = learning_rate/var_lr[1]     
        #save all the variables		
		save_path = saver.save(sess, checkpoint_save)	
		coord.request_stop()
		coord.join(threads)
		print("---training over---")

# **Evaluate**

### Functions

In [None]:
 def decoding(encoded_data, type = 'logit'):
  '''
  @brief: decoding
  @args[in]:
  @args[out]:
  '''
  if(type == 'logit'):
    prediction = np.argmax(encoded_data, 2)
  elif(type == 'label'):
    prediction = np.argmax(encoded_data, 1)
  decoded_prediction = []
  for dp in prediction:
    predicted_text = ''
    for p in dp:
      predicted_text += all_chr[p]
    decoded_prediction.append(predicted_text)
  return decoded_prediction


def eval_vizualization(X):
  '''
  @brief:
  @args[in]:
  @args[out]:
  '''
  decoded_text = []
  logit = inference(X, class_count)
  init=tf.global_variables_initializer()
  saver=tf.train.Saver()
  
  with tf.Session() as sess:
    sess.run(init)
    saver.restore(sess,checkpoint_restore)
    text = sess.run(logit)
    decoded_text = decoding(text, type = 'logit')
  for i in range(X.shape[0]):
    x = np.reshape(X[i, :,:,:], image_size[0:2])
    plt.imshow(x, cmap = 'gray')
    plt.show()
    print("text: ", decoded_text[i], '<---')

### Run

In [None]:
steps=((test_data_count))//batch_size
accu=0
x_test, y_test = minibatch(batch_size, test_file_paths, image_size, string_length, class_count)
logit_test = inference(x_test, class_count)
accuracy_test = accuracy_calc(logit_test, y_test)  
init=tf.global_variables_initializer()
saver=tf.train.Saver()
with tf.Session() as sess:
	sess.run(init)
	coord = tf.train.Coordinator()
	threads = tf.train.start_queue_runners(coord=coord)
	saver.restore(sess,checkpoint_restore)
	for s in range(steps):
		acc=sess.run(accuracy_test)
		accu+=acc/steps
	print("test set accuracy: ",acc)
	coord.request_stop()
	coord.join(threads)

In [None]:
all_chr = list(string.ascii_letters) + list(string.digits) + list(' ')
x_check, y_check=minibatch(batch_size, test_file_paths, image_size, string_length, class_count) 
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  sess.run(tf.local_variables_initializer())
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord) 
  x_c = sess.run(x_check)  
  eval_vizualization(x_c[1:5])
  coord.request_stop()
  coord.join(threads)