# This is the Jupyter Notebook for the MAGICODE project

### First install some modules that might not be installed

In [None]:
# Install a pip package in the current Jupyter kernel
# https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/
import sys
!{sys.executable} -m pip install tensorflow
# install sklearn to use train_test_split function
!{sys.executable} -m pip install sklearn
# install opencv to use cv2 module in get_preprocessed_img
!{sys.executable} -m pip install opencv-python

### Then import some libraries and modules that are needed for the code to run

In [None]:
import os
import sklearn.model_selection as model_selection
import glob
import shutil
import numpy as np
from pathlib import Path
from os.path import join

### Unzip the dataset

In [None]:
# join the volumes together into a single zip file
!zip -s 0 dataset.zip -O dataset_joined.zip
# unzip the newly assembled archive into the current folder
!unzip dataset_joined.zip -d ./

if os.listdir('dataset') :
    print ('files unzipped')

### Define some values used later

In [None]:
TRAINING_SET_NAME = 'training_set'
EVALUATION_SET_NAME = 'eval_set'
IMAGE_SIZE = 256
BATCH_SIZE = 64
CONTEXT_LENGTH = 48
START_TOKEN = "<START>"
END_TOKEN = "<END>"
PLACEHOLDER = " "
SEPARATOR = '->'
EPOCHS = 10

### Split dataset into training and evaluation sets

In [None]:
# define source folder
source = 'dataset'
# get all file paths
all_files = os.listdir(source)
# build a generic image path (e.g. 'all_data/dataset/*.png')
images_path = join(source, '*.png')
# get all images paths
img_files = glob.glob(images_path)
# remove files extension from files paths
img_files_without_extension = [Path(img_file).stem for img_file in img_files]

# splits randomly the files into two sets (train_set = 85% of dataset, eval_set = 15% of dataset)
train_set,eval_set = model_selection.train_test_split(img_files_without_extension, train_size=0.85)

# create the TRAINING_SET_NAME and EVALUATION_SET_NAME directories if they do not exist
if not os.path.exists(join(source, TRAINING_SET_NAME)):
    os.makedirs(join(source, TRAINING_SET_NAME))
if not os.path.exists(join(source, EVALUATION_SET_NAME)):
    os.makedirs(join(source, EVALUATION_SET_NAME))

# copy the files (img and gui) from the all_data folder into the training_set folder
for file in train_set:
    shutil.copyfile(join(source, file + '.png'), join(source, TRAINING_SET_NAME, file + '.png'))
    shutil.copyfile(join(source, file + '.gui'), join(source, TRAINING_SET_NAME, file + '.gui'))

# copy the files (img and gui) from the all_data folder into the eval_set folder
for file in eval_set:
    shutil.copyfile(join(source, file + '.png'), join(source, EVALUATION_SET_NAME, file + '.png'))
    shutil.copyfile(join(source, file + '.gui'), join(source, EVALUATION_SET_NAME, file + '.gui'))

print('Training dataset: {}'.format(join(source, TRAINING_SET_NAME)))
print('Evaluation dataset: {}'.format(join(source, EVALUATION_SET_NAME)))


### Define some Classes and functions that will be used a few times

### Transform training set into numpy arrays

In [None]:
#define source and destination folders
source = join('dataset', 'training_set')
destination = join('dataset', 'training_features')

# create the training_features directory if it does not exist
if not os.path.exists(destination):
    os.makedirs(destination)

# transform images in training dataset (i.e. normalized pixel values and resized pictures) to numpy arrays (smaller files, useful if uploading the set to train a model in the cloud)
for f in os.listdir(source):
    if f.find('.png') != -1:
        img = Utils.get_preprocessed_img(join(source, f), IMAGE_SIZE)
        file_name = f[:f.find('.png')]

        np.savez_compressed(join(destination, file_name), features=img)
        retrieve = np.load(join(destination, file_name + '.npz'))['features']
        
        assert np.array_equal(img, retrieve)
        
        shutil.copyfile(join(source, file_name + '.gui'), join(destination, file_name + '.gui'))


In [None]:
# make folder to store runtime files
if not os.path.exists('bin'):
    os.mkdir('bin')

### Declare magicode class

In [None]:
from tensorflow.keras.layers import Input, Dense, Dropout, \
                         RepeatVector, LSTM, concatenate, \
                         Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential, Model, model_from_json
from tensorflow.keras.optimizers import RMSprop

from tensorflow.keras import *

class magicode:
    def __init__(self, input_shape, output_size, output_path):
        self.model = None
        self.name = 'magicode'
        self.input_shape = input_shape
        self.output_size = output_size
        self.output_path = output_path

        image_model = Sequential()
        image_model.add(Conv2D(32, (3, 3), padding='valid', activation='relu', input_shape=input_shape))
        image_model.add(Conv2D(32, (3, 3), padding='valid', activation='relu'))
        image_model.add(MaxPooling2D(pool_size=(2, 2)))
        image_model.add(Dropout(0.25))

        image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu'))
        image_model.add(Conv2D(64, (3, 3), padding='valid', activation='relu'))
        image_model.add(MaxPooling2D(pool_size=(2, 2)))
        image_model.add(Dropout(0.25))

        image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu'))
        image_model.add(Conv2D(128, (3, 3), padding='valid', activation='relu'))
        image_model.add(MaxPooling2D(pool_size=(2, 2)))
        image_model.add(Dropout(0.25))

        image_model.add(Flatten())
        image_model.add(Dense(1024, activation='relu'))
        image_model.add(Dropout(0.3))
        image_model.add(Dense(1024, activation='relu'))
        image_model.add(Dropout(0.3))

        image_model.add(RepeatVector(CONTEXT_LENGTH))

        visual_input = Input(shape=input_shape)
        encoded_image = image_model(visual_input)

        language_model = Sequential()
        language_model.add(LSTM(128, return_sequences=True, input_shape=(CONTEXT_LENGTH, output_size)))
        language_model.add(LSTM(128, return_sequences=True))

        textual_input = Input(shape=(CONTEXT_LENGTH, output_size))
        encoded_text = language_model(textual_input)

        decoder = concatenate([encoded_image, encoded_text])

        decoder = LSTM(512, return_sequences=True)(decoder)
        decoder = LSTM(512, return_sequences=False)(decoder)
        decoder = Dense(output_size, activation='softmax')(decoder)

        self.model = Model(inputs=[visual_input, textual_input], outputs=decoder)

        optimizer = RMSprop(lr=0.0001, clipvalue=1.0)
        self.model.compile(loss='categorical_crossentropy', optimizer=optimizer)

    def fit_generator(self, generator, steps_per_epoch):
        self.model.fit(generator, steps_per_epoch=steps_per_epoch, epochs=EPOCHS, verbose=1)
        self.save()

    def predict(self, image, partial_caption):
        return self.model.predict([image, partial_caption], verbose=0)[0]

    def save(self):
        model_json = self.model.to_json()
        with open(format(self.output_path, self.name, '.json'), "w") as json_file:
            json_file.write(model_json)
        self.model.save_weights(join(self.output_path, self.name, '.h5'))

    def load(self, name=""):
        output_name = self.name if name == "" else name
        with open(join(self.output_path, output_name, '.json'), "r") as json_file:
            loaded_model_json = json_file.read()
        self.model = model_from_json(loaded_model_json)
        self.model.load_weights(join(self.output_path, output_name, '.h5'))

### Train the model using a generator (to avoid having to fit all the data in memory)

In [None]:
import tensorflow as tf

np.random.seed(1234)

training_features = join('dataset', 'training_features')
output_path = join('bin')

dataset = Dataset()
dataset.load(training_features, generate_binary_sequences=True)
dataset.save_metadata(output_path)
dataset.voc.save(output_path)

gui_paths, img_paths = Dataset.load_paths_only(training_features)

input_shape = dataset.input_shape
output_size = dataset.output_size
steps_per_epoch = dataset.size / BATCH_SIZE
voc = Vocabulary()
voc.retrieve(output_path)

generator = Generator.data_generator(voc, gui_paths, img_paths, batch_size=BATCH_SIZE, generate_binary_sequences=True)

model = magicode(input_shape, output_size, output_path)

model.fit_generator(generator, steps_per_epoch=steps_per_epoch)

### Create the directories for storing screenshots to "decode" and the resulting code

In [None]:
# create directory to store images to be "decoded"
if not os.path.exists('screenshots_to_convert'):
    os.mkdir('screenshots_to_convert')
# create directory to store HTML and GUI code generated by "decoding" images in screenshots folder 
if not os.path.exists('generated_code'):
    os.mkdir('generated_code')

### Generate the code for provided screenshots - add files to the screenshots folder

In [None]:
trained_weights_path = 'bin'
trained_model_name = 'magicode'
input_path = 'screenshots_to_convert'
output_path = 'generated_code'

meta_dataset = np.load(join(trained_weights_path, 'meta_dataset.npy'), allow_pickle=True)
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = magicode(input_shape, output_size, trained_weights_path)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size, CONTEXT_LENGTH)

for f in os.listdir(input_path):
    if f.find('.png') != -1:
        evaluation_img = Utils.get_preprocessed_img(join(input_path,f), IMAGE_SIZE)

        file_name = f[:f.find('.png')]

        result, _ = sampler.predict_greedy(model, np.array([evaluation_img]))
        print('Result greedy: {}'.format(result))

        with open(join(output_path, file_name + '.gui'), 'w') as out_f:
            out_f.write(result.replace(START_TOKEN, '').replace(END_TOKEN, ''))

### Declare compiler

In [None]:
import json
import string
import random



class Compiler:
    def __init__(self, dsl_mapping_file_path):
        with open(dsl_mapping_file_path) as data_file:
            self.dsl_mapping = json.load(data_file)

        self.opening_tag = self.dsl_mapping['opening-tag']
        self.closing_tag = self.dsl_mapping['closing-tag']
        self.content_holder = self.opening_tag + self.closing_tag

        self.root = Node('body', None, self.content_holder)

    def compile(self, input_file_path, output_file_path, rendering_function=None):
        dsl_file = open(input_file_path)
        current_parent = self.root

        for token in dsl_file:
            token = token.replace(' ', '').replace('\n', '')

            if token.find(self.opening_tag) != -1:
                token = token.replace(self.opening_tag, '')

                element = Node(token, current_parent, self.content_holder)
                current_parent.add_child(element)
                current_parent = element
            elif token.find(self.closing_tag) != -1:
                current_parent = current_parent.parent
            else:
                tokens = token.split(',')
                for t in tokens:
                    element = Node(t, current_parent, self.content_holder)
                    current_parent.add_child(element)

        output_html = self.root.render(self.dsl_mapping, rendering_function=rendering_function)
        with open(output_file_path, 'w') as output_file:
            output_file.write(output_html)

class Node:
    def __init__(self, key, parent_node, content_holder):
        self.key = key
        self.parent = parent_node
        self.children = []
        self.content_holder = content_holder

    def add_child(self, child):
        self.children.append(child)

    def show(self):
        print(self.key)
        for child in self.children:
            child.show()

    def render(self, mapping, rendering_function=None):
        content = ''
        for child in self.children:
            content += child.render(mapping, rendering_function)

        value = mapping[self.key]
        if rendering_function is not None:
            value = rendering_function(self.key, value)

        if len(self.children) != 0:
            value = value.replace(self.content_holder, content)

        return value

class CompilerUtils:
    @staticmethod
    def get_random_text(length_text=10, space_number=1, with_upper_case=True):
        results = []
        while len(results) < length_text:
            char = random.choice(string.ascii_letters[:26])
            results.append(char)
        if with_upper_case:
            results[0] = results[0].upper()

        current_spaces = []
        while len(current_spaces) < space_number:
            space_pos = random.randint(2, length_text - 3)
            if space_pos in current_spaces:
                break
            results[space_pos] = " "
            if with_upper_case:
                results[space_pos + 1] = results[space_pos - 1].upper()

            current_spaces.append(space_pos)

        return ''.join(results)

### Compile the generated code

In [None]:
FILL_WITH_RANDOM_TEXT = True
TEXT_PLACE_HOLDER = '[]'

dsl_path = join('compiler','assets','dsl-mapping.json')
compiler = Compiler(dsl_path)

def render_content_with_text(key, value):
    text_inputs = ['input-text', 'input-password']
    control_inputs = ['input-checkbox', 'input-radio']
    if FILL_WITH_RANDOM_TEXT:
        if key.find('btn') != -1:
            value = value.replace(TEXT_PLACE_HOLDER, CompilerUtils.get_random_text())
        elif key.find('title') != -1:
            value = value.replace(TEXT_PLACE_HOLDER, CompilerUtils.get_random_text(length_text=5, space_number=0))
        elif key.find('text') != -1:
            value = value.replace(TEXT_PLACE_HOLDER,
                                  CompilerUtils.get_random_text(length_text=56, space_number=7, with_upper_case=False))
        elif any(text_input in key for text_input in text_inputs):
            value = value.replace(TEXT_PLACE_HOLDER, CompilerUtils.get_random_text(length_text=30, space_number=0))
        elif any(control_input in key for control_input in control_inputs):
            value = value.replace(TEXT_PLACE_HOLDER, CompilerUtils.get_random_text(length_text=10, space_number=0))
    return value

path = 'generated_code'
generated_code_files = os.listdir(path)

for file in generated_code_files:
    file_uid = Path(file).stem
    input_file_path = join(path, file_uid + '.gui')
    output_file_path = join(path, file_uid + '.html')

    compiler.compile(input_file_path, output_file_path, rendering_function=render_content_with_text)