# This is the Jupyter Notebook for the MAGICODE project - TEST

### First we import some libraries and modules that we will need for the code to run

In [1]:
import os
import sys
import sklearn.model_selection as model_selection
import glob
import shutil
from pathlib import Path
from os.path import join
os.chdir('datasets')

### Then we install the modules that are (usually) not installed by default

In [None]:
# install sklearn to use train_test_split function
!pip install sklearn
# install opencv to use cv2 module in get_preprocessed_img
!pip install opencv-python
!pip install tensorflow

### Unzip the dataset

In [None]:
# join the volumes together into a single zip file
!zip -s 0 dataset.zip -O dataset_joined.zip
# unzip the newly assembled archive
!unzip dataset_joined.zip -d ./all_data
print ('files unzipped')

### Split dataset into training and evaluation sets

In [21]:
# define source folder
source = join('all_data', 'dataset')
# get all file paths
all_files = os.listdir(source)
# build a generic image path (e.g. 'all_data/dataset/*.png')
images_path = os.path.join(source, '*.png')
# get all images paths
img_files = glob.glob(images_path)
# remove files extension from files paths
img_files_without_extension = [Path(img_file).stem for img_file in img_files]

# training set will be 6 times the size of the evaluation set
distribution=6
TRAINING_SET_NAME = 'training_set'
EVALUATION_SET_NAME = 'eval_set'

# splits randomly the files into two sets
# Path(*).stem returns the file name without extension
train_set,eval_set = model_selection.train_test_split(img_files_without_extension, train_size=(distribution / 10))

# create the TRAINING_SET_NAME and EVALUATION_SET_NAME directories if they do not exist
if not os.path.exists(join(source, TRAINING_SET_NAME)):
    os.makedirs(join(source, TRAINING_SET_NAME))
if not os.path.exists(os.path.join(source, EVALUATION_SET_NAME)):
    os.makedirs(join(source, EVALUATION_SET_NAME))

# copy the files (img and gui) from the all_data folder into the training_set folder
for file in train_set:
    shutil.copyfile(join(source, file + '.png'), join(source, TRAINING_SET_NAME, file + '.png'))
    shutil.copyfile(join(source, file + '.gui'), join(source, TRAINING_SET_NAME, file + '.gui'))

# copy the files (img and gui) from the all_data folder into the eval_set folder
for file in eval_set:
    shutil.copyfile(join(source, file + '.png'), join(source, EVALUATION_SET_NAME, file + '.png'))
    shutil.copyfile(join(source, file + '.gui'), join(source, EVALUATION_SET_NAME, file + '.gui'))

print('Training dataset: {}'.format(join(source, TRAINING_SET_NAME)))
print('Evaluation dataset: {}'.format(join(source, EVALUATION_SET_NAME)))


### Transoform training set into numpy arrays

In [29]:
from model.classes.Utils import *

IMAGE_SIZE = 256

#define source and destination folders
source = join('all_data', 'dataset', 'training_set')
destination = join('all_data', 'dataset', 'training_features')

# create the training_features directory if it does not exist
if not os.path.exists(destination):
    os.makedirs(destination)

# transform images in training dataset (i.e. normalized pixel values and resized pictures) to numpy arrays (smaller files, useful if uploading the set to train a model in the cloud)
for f in os.listdir(source):
    if f.find('.png') != -1:
        img = Utils.get_preprocessed_img(join(source, f), IMAGE_SIZE)
        file_name = f[:f.find('.png')]

        np.savez_compressed(join(destination, file_name), features=img)
        retrieve = np.load(join(destination, file_name + '.npz'))['features']
        
        assert np.array_equal(img, retrieve)
        
        shutil.copyfile(join(source, file_name + '.gui'), join(destination, file_name + '.gui'))


In [30]:
os.chdir('../')
if not os.path.exists('bin'):
    os.mkdir('bin')
os.chdir('model')

### Train the model using a generator (to avoid having to fit all the data in memory)

In [41]:
import tensorflow as tf

#sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

from classes.dataset.Generator import *
from classes.model.pix2code import *

np.random.seed(1234)
BATCH_SIZE = 64

training_features = join('..', 'datasets', 'all_data', 'dataset', 'training_features')
output_path = join('..', 'bin')

dataset = Dataset()
dataset.load(training_features, generate_binary_sequences=True)
dataset.save_metadata(output_path)
dataset.voc.save(output_path)

gui_paths, img_paths = Dataset.load_paths_only(training_features)

input_shape = dataset.input_shape
output_size = dataset.output_size
steps_per_epoch = dataset.size / BATCH_SIZE
voc = Vocabulary()
voc.retrieve(output_path)

generator = Generator.data_generator(voc, gui_paths, img_paths, batch_size=BATCH_SIZE, generate_binary_sequences=True)

model = pix2code(input_shape, output_size, output_path)

model.fit_generator(generator, steps_per_epoch=steps_per_epoch)

2.4.0
Loading data...
Generating sparse vectors...
Dataset size: 75787
Vocabulary size: 26
Input shape: (256, 256, 3)
Output size: 26
Parsing data...


### Create the directories for storing screenshots to "decode" and the resulting code

In [None]:
# step out of model directory
os.chdir('..')
# create directory to store images to be "decoded"
if not os.path.exists('screenshots'):
    os.mkdir('screenshots')
# create directory to store code generated by "decoding" images in screenshots folder 
if not os.path.exists('code'):
    os.mkdir('code')

### Generate the code for provided screenshots

In [None]:
from classes.Sampler import *
from classes.model.pix2code import *

trained_weights_path = 'bin'
trained_model_name = 'pix2code'
input_path = 'screenshots'
output_path = 'code'

CONTEXT_LENGTH = 48

meta_dataset = np.load(join(trained_weights_path, 'meta_dataset.npy'))
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size, CONTEXT_LENGTH)

for f in os.listdir(input_path):
    if f.find(''.png') != -1:
        evaluation_img = Utils.get_preprocessed_img(join(input_path,p), IMAGE_SIZE)

        file_name = f[:f.find(''.png')]

        result, _ = sampler.predict_greedy(model, np.array([evaluation_img]))
        print('Result greedy: {}'.format(result))

        with open(join(output_path, file_name + '.gui'), 'w') as out_f:
            out_f.write(result.replace(START_TOKEN, '').replace(END_TOKEN, ''))