## Decode Emorfi video

In [None]:
import os
import string
import random
import resize_image
import cv2
import shutil
import collections 
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.preprocessing import image

import keras
from keras.models import load_model,Sequential
from keras.initializers import glorot_normal
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Reshape

from resources.data_utils import DataGenerator
from resources.utils import prediction_standardized,img_resize

In [None]:
def resize_image(input_dir, out_path, image_size, image_prefix):
    image_names = [name for name in os.listdir(input_dir) if name.startswith(image_prefix)]
    for image_name in tqdm(image_names, total=len(image_names), desc="Resizing images"):
        img_resize(in_path=path_extract + image_name, out_path=path_prepocessed + image_name, image_size=image_size)

In [None]:
def build_model(nb_classes: int, image_length, seed=25):
    initializer = glorot_normal(seed=seed)
    model = Sequential()
    model.add(Reshape((image_length, image_length, 1), input_shape=(image_length, image_length,)))

    model.add(
        Conv2D(64, kernel_size=(5, 5), padding='same', kernel_initializer=initializer))
    model.add(MaxPooling2D(pool_size=(10, 10)))  
    model.add(Activation('relu'))

    model.add(
        Conv2D(128, kernel_size=(5, 5), padding='same', kernel_initializer=initializer))
    model.add(MaxPooling2D(pool_size=(8, 8)))  
    model.add(Activation('relu'))

    model.add(Reshape((128 * 1 * 1,), input_shape=(1, 1, 128)))
    model.add(Dense(50, activation='relu', kernel_initializer=initializer))
    model.add(Dense(nb_classes, activation='softmax', kernel_initializer=initializer))

    return model

In [None]:
def binary_label_to_decimal(labels: np.ndarray) -> np.ndarray:
    # Turn a list of binary vectors to their decimal format
    output = np.zeros(labels.shape[0]) 
    for i, x in enumerate(labels):
        output[i] = np.argmax(x)+1
    return output.astype(int)

In [None]:
def majority_vote(predictions_dec,n):
    predictions_mv = []
    text_length = int(len(predictions_dec)/n)
    for i in range(0,text_length):
        n_pred = []
        for j in range(0,n):
            pred = predictions_dec[j*text_length+i]
            n_pred.append(pred)
        mv = collections.Counter(n_pred).most_common()[0][0]
        predictions_mv.append(mv)
    return predictions_mv

In [None]:
path = os.getcwd()+'/dataset/'
path_extract = path+'extract/'
path_prepocessed = path+"preprocessed_80/"

shutil.rmtree(path_extract)
os.mkdir(path_extract)
shutil.rmtree(path_prepocessed)
os.mkdir(path_prepocessed)

In [None]:
# Extract images from video
video_name = 'movie.mp4'
cap = cv2.VideoCapture(path+video_name)

i=1
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == False:
        break
    frame_gs = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    cv2.imwrite(path_extract+str(i)+'.jpg',frame_gs)
    i+=1
cap.release()
cv2.destroyAllWindows()

In [None]:
# Preprocess the images to 80*80 pixels
image_size = 80
image_prefix = ''
resize_image(path_extract, path_prepocessed, image_size, image_prefix)

In [None]:
# Load trained model
model = build_model(nb_classes=127, image_length = 80)
path_model = path + 'model.h5' 
model = load_model(path_model)

In [None]:
# Get all extracted, preprocessed images
img_arr = os.listdir(path_prepocessed)
img_arr_sorted = sorted(img_arr,key=lambda x: int(os.path.splitext(x)[0]))

decoded_text = []
img_arr_full = []

for img_name in img_arr_sorted:
    img_path = path_prepocessed + img_name
    img_arr_full.append(img_path)

In [None]:
# Get the true labels, i.e. the original text
text = 'hello world' # The original text encdoded in patterns
splitted_text = [char for char in text]

keyboard = string.printable
keyboard = [char for char in keyboard]
keyboard_dict = dict(zip(keyboard, np.linspace(1,len(keyboard)+1,len(keyboard)+1).astype(int)))
keyboard_dict_r = dict(zip(np.linspace(1,len(keyboard)+1,len(keyboard)+1).astype(int),keyboard))

main_labels = [] 
for i in splitted_text:
    splitted_text_dec = keyboard_dict.get(i)
    main_labels.append(splitted_text_dec)
    
n = 5 # number of video repeats 
main_labels_n = main_labels*n

In [None]:
# Decode  
df_video = pd.DataFrame({"img_path": img_arr_full, "label": main_labels_n})
generation_params = {"dim": (80,80),"nb_classes": 127,"column_img": "img_path","column_label": "label"}
test_generator = DataGenerator(data_frame=df_video, batch_size=len(img_arr_full), shuffle=False, **generation_params)
predictions = model.predict_generator(generator=test_generator)
predictions_dec = binary_label_to_decimal(prediction_standardized(predictions))
predictions_mv = majority_vote(predictions_dec,n)

# Compute accuracy
acc_test = accuracy_score(main_labels, predictions_mv)
print(acc_test)

predited_text_mv = ''
for i in predictions_mv:
    if i>len(keyboard_dict_r):
        t = '×' # to indicate predictions beyond the printable characters
    else:
        t = keyboard_dict_r[i]
    predited_text_mv=predited_text_mv+t
    
print("The original text:")
print(text)
print("The predicted text:")
print(predited_text_mv) 
n_error = int((1-acc_test)*len(text))
print(str(n_error)+" characters were predicted wrong")