In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from keras.preprocessing import image
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply, RepeatVector, Dense, Activation, Lambda
from keras.models import Model

In [None]:
import matplotlib.pylab as plt
import os
from PIL import Image, ImageFilter
import h5py
import tarfile
from google.colab import files
import numpy as np
import cv2

In [None]:
import tensorflow.compat.v1 as tf2
tf2.disable_v2_behavior()
tf2.enable_eager_execution()

In [None]:
'''
  - > Dataset is obtained from file 'IIIT5K-Word_V3.0.tar.gz'.
  - >The Dataset contains several files and 2 folders train and test
  - >Filed named traindata.m and testdata.m contains all the information related
       to the train and test images and their annotations respectively.
'''
os.chdir('/content')
tar = tarfile.open('IIIT5K-Word_V3.0.tar.gz')
tar.extractall()
tar.close()

In [None]:
os.chdir('IIIT5K')
base_dir = os.getcwd()
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir,'test')
print(train_dir)

In [None]:
num_imgs_train = len(os.listdir(train_dir))
num_imgs_test = len(os.listdir(test_dir))
print("number of images in training dataset is {}".format(num_imgs_train))
print("number of images in testing dataset is {}".format(num_imgs_test))

In [None]:
def rotateImages(rotationAmt):
  '''
   -> rotateImages is used as one of the image augumentation techniques to 
      increase the dataset thereby increasing the accuracy.
   -> rotateImages function rotates images in the current directory.

   ARGS **
   rotationAmt : ( Integer ) The value of rotation in the image ( ~ + or - 15 )
  '''
  # for each image in the current directory
  for image in os.listdir(os.getcwd()):
    # open the image
    if (image.find("rot") == -1):
      img = Image.open(image)
      img_name = list(image.split("."))[0]
      # rotate and save the image with the same filename
      rotimg = img.rotate(rotationAmt)
      rotimg.save(img_name+"rot"+str(rotationAmt)+".jpg")
      # close the image
      img.close()  


In [None]:
def addBlur():
  for image in os.listdir(os.getcwd()):
    img = Image.open(image)
    blur = img.filter(ImageFilter.BLUR)
    img_name = list(image.split("."))[0]
    blur.save(img_name+"blur.jpg")


In [None]:
!pwd

In [None]:
def preprocessing():
  rotateImages(15)
  rotateImages(-15)
  addBlur()

In [None]:
os.chdir('train')
preprocessing()
os.chdir('/content/IIIT5K/test')
preprocessing()
os.chdir('/content/IIIT5K')

In [None]:
# loading inception_resnet_v2 trained on imagenet dataset as per https://arxiv.org/pdf/1704.03549.pdf

# URL = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"
#feature_extractor = hub.KerasLayer(URL,
#                                   input_shape=(IMAGE_RES, IMAGE_RES,3))

main_model = tf.keras.applications.InceptionResNetV2(include_top=True, weights='imagenet', pooling=None)


In [None]:
# freezing the weights of the model and removing the last layer
main_model.trainable = False
main_model.summary()

In [None]:
def load_and_process_img(path_to_img):
  img = plt.imread(path_to_img)
  #img = tf.convert_to_tensor(img)
  #img = tf.image.resize(img,[1014, 1280])
  img = image.load_img(path_to_img, target_size=(299,299, 3))
  img = image.img_to_array(img)
  img = np.expand_dims(img, axis=0)
  #img = tf.keras.applications.vgg19.preprocess_input(img)
  return img

In [None]:
os.chdir('train')
other_layers = ['block8_1_conv', 'mixed_7a'] 
layer_name = 'mixed_7a'
layer_output = main_model.get_layer(layer_name).output
model_cnn = tf.keras.Model(inputs=main_model.input, outputs=layer_output)
#input_data = Image.open('1009_2.png')
input_data = load_and_process_img('1009_2.png')
result = model_cnn.predict(input_data)
print(list(result.shape))
(m,n_H,n_W,n_C) = result.shape
reshaped_result = tf.reshape(result, shape=(m,n_H*n_W,n_C))
print(reshaped_result.shape)
os.chdir('/content/IIIT5K')

In [None]:
square = 8
ix = 1
for _ in range(square):
	for _ in range(square):
		# specify subplot and turn of axis
		ax = plt.subplot(square, square, ix)
		ax.set_xticks([])
		ax.set_yticks([])
		# plot filter channel in grayscale
		plt.imshow(result[0, :, :, ix-1], cmap='gray')
		ix += 1
# show the figure
plt.show()
plt.savefig('Activations.png')

In [None]:
square = 8
ix = 1
sf = 1087
for _ in range(square):
	for _ in range(square):
		# specify subplot and turn of axis
		ax = plt.subplot(square, square, ix)
		ax.set_xticks([])
		ax.set_yticks([])
		# plot filter channel in grayscale
		plt.imshow(result[0, :, :, sf], cmap='gray')
		ix += 1;sf -= 1;
# show the figure
plt.show()
plt.savefig('Activations.png')

In [None]:
def string_to_int(string, length, vocab):

    string = string.lower()
    if len(string) > length:
        string = string[:length]
    rep = list(map(lambda x: vocab.get(x, '<unk>'), string))
    
    if len(string) < length:
        rep += [vocab['<pad>']] * (length - len(string))

    return rep

In [None]:
# vocabulary has been taken from internet 
vocab = {' ': 0, '.': 1, '/': 2, '0': 3, '1': 4, '2': 5, '3': 6, '4': 7, '5': 8, '6': 9, '7': 10, '8': 11, '9': 12, 'a': 13, 'b': 14, 'c': 15, 'd': 16, 'e': 17, 'f': 18, 'g': 19, 'h': 20, 'i': 21, 'j': 22, 'l': 23, 'm': 24, 'n': 25, 'o': 26, 'p': 27, 'r': 28, 's': 29, 't': 30, 'u': 31, 'v': 32, 'w': 33, 'y': 34, '<unk>': 35, '<pad>': 36}

In [None]:
def load_output(filename,T_Y,vocab):

  os.chdir('/content/IIIT5K')
  text_file = open(filename, "r")
  text = text_file.readlines()
  text = [ word[0:len(word)-1] for word in text]

  Y = np.array([string_to_int(word,T_Y,vocab) for word in text])

  return Y

In [None]:
Output_array = load_output("Train_labels.txt",20,vocab)

In [None]:
n_a = 64 # number of units for the pre-attention, bi-directional LSTM's hidden state 'a'
n_s = 120 # number of units for the post-attention, bi-directionsl LSTM's hidden state "s"
T_X = list(reshaped_result.shape)[1]
feature_length = list(reshaped_result.shape)[2] 
T_Y = 20
print(T_X,feature_length)

In [None]:
# Defined shared layers as global variables
repeat = RepeatVector(T_X)
concat = Concatenate(axis=-1)
dense1 = Dense(10, activation = "tanh")
dense2 = Dense(1, activation = "relu")
activation = Activation(activation="softmax", name='attention_weights')
dot = Dot(axes = 1)

In [None]:
def attention(a, s_prev):
  s_prev = repeat(s_prev)
  concat_value = concat([a,s_prev])
  e = dense1(concat_value)
  energies = dense2(e)
  alphas = activation(energies)
  context = dot([alphas,a])
  return context

In [None]:
post_activation_LSTM_cell = Bidirectional(LSTM(n_s, return_state = True))
output_layer = Dense(20, activation="softmax")

In [None]:
def seq2seq_model(T_X, T_Y, n_a, n_s,feature_length):

  X = Input(shape=(T_X, feature_length))
  s0 = Input(shape=(n_s,), name='s0')
  c0 = Input(shape=(n_s,), name='c0')
  s = s0
  c = c0
  outputs = []
  tf.reshape(s, shape=(120,120))
  print(s)

  a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
  for t in range(T_Y):
        context = attention(a,s)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [s,c])
        print(t)
        out = output_layer(s)
        outputs.append(out)

  model = Model(inputs = [X,s0,c0], outputs = outputs)
  return model

In [None]:
model = seq2seq_model(T_X, T_Y, n_a, n_s, feature_length)

In [None]:
  
  X = Input(shape=(T_X, feature_length))
  print("X is ",X)
  s0 = Input(shape=(n_s,), name='s0')
  c0 = Input(shape=(n_s,), name='c0')
  s = s0
  c = c0
  print("S is ",s)
  print("C is ",c)
    
  outputs = []

  a = Bidirectional(LSTM(n_a, return_sequences = True))(X)
  print("a and s are ",a,s)

  for t in range(T_Y):
        context = attention(a,s)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [tf.zeros([1, 120]), tf.eye(120, batch_shape=[1])])
        out = output_layer(s)
        outputs.append(out)

  model = Model(inputs = [X,s0,c0], outputs = outputs)

In [None]:
    X = Input(shape=(T_X, feature_length))
    print("X is ",X)
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    print("S is ",s)
    print("C is ",c)
    
    # Initialize empty list of outputs
    outputs = []
    
    ### START CODE HERE ###
    
    # Step 1: Define your pre-attention Bi-LSTM. (≈ 1 line)
    a = Bidirectional(LSTM(n_a,return_sequences=True))(X)
    print("a and s are ",a,s)
    # Step 2: Iterate for Ty steps
    for t in range(T_Y):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = attention(a,s)
        print(context)
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s, _, c = post_activation_LSTM_cell(context, initial_state = [s,c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)

In [None]:
!git clone "https://github.com/oh-my-ocr/text_renderer"

In [None]:
os.chdir("text_renderer")

In [None]:
!python3 setup.py develop

In [None]:
!pip3 install -r docker/requirements.txt

In [None]:
!python3 main.py \
    --config example_data/example.py \
    --dataset img \
    --num_processes 2 \
    --log_period 10

In [None]:
!zip -r Dataset.zip /content/output

In [None]:
files.download('Dataset.zip')

In [None]:
!git clone --single-branch --branch python3 "https://github.com/ankush-me/SynthText.git" 