### Import Libraries

In [1]:
import os
import json
import numpy as np
from tqdm import tqdm
import re

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import load_model

from keras.models import Model
from keras.layers import Dense, Embedding, LSTM, Input, concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint

import cv2 as cv

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Embedding, \
    LSTM, Bidirectional, Lambda, Concatenate, Add
from keras.layers.convolutional import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization, regularizers
from keras.optimizers import Adam, RMSprop
import gc
import subprocess
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Loading Files

In [2]:
image_files = os.listdir('./images')

In [3]:
image_files[0]

'CLEVR_new_000000.png'

In [4]:
len(image_files)

14791

In [5]:
with open('./Quest_Answers.json') as f:
    que_ans = json.load(f)['quest_answers']

que_ans[0]

{'Question': 'There is a metal thing that is in front of the gray thing right of the big blue shiny sphere; how many rubber cubes are in front of it?',
 'Image': 'CLEVR_new_000000',
 'Answer': 0}

In [6]:
len(que_ans)

135020

### Preprocessing Questions

In [7]:
groups_dict = {
    'number':['0', '1', '2', '3', '4', '5', '6', '7', '8'],
    'material':['rubber','metal'],
    'color':['cyan','blue','yellow','purple','red','green','gray','brown'],
    'shape':['sphere','cube','cylinder'],
    'size':['large','small'],
    'exist':['True', 'False']
}

In [8]:
def get_group(answer):
    for name,values in groups_dict.items():
        if answer in values:
            return name

In [9]:
questions = []
answers = []
groups = []
images = []

for q in que_ans:
    answers.append(str(q['Answer']))
    images.append(q['Image'])
    groups.append(get_group(str(q['Answer'])))
    questions.append(q['Question'])

In [10]:
tokenizer = Tokenizer(num_words=100, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
                lower=True, split=' ')

tokenizer.fit_on_texts(questions)
questions_tokenized = tokenizer.texts_to_sequences(questions)
questions_padded = pad_sequences(questions_tokenized, maxlen=40)
questions_padded

array([[ 0,  0,  0, ..., 27,  3, 47],
       [ 0,  0,  0, ..., 38, 31,  7],
       [ 0,  0,  0, ..., 13,  2, 47],
       ...,
       [ 0,  0,  0, ..., 41, 17,  6],
       [ 0,  0,  0, ..., 16, 17, 44],
       [ 0,  0,  0, ...,  1, 39,  7]])

In [11]:
file = open("questions.pkl", "wb")
pickle.dump(questions_padded, file)
file.close()

In [12]:
file = open("answers.pkl", "wb")
pickle.dump(answers, file)
file.close()

In [13]:
file = open("image_labels.pkl", "wb")
pickle.dump(images, file)
file.close()

In [14]:
file = open("groups.pkl", "wb")
pickle.dump(groups, file)
file.close()

In [15]:
file = open("tokenizer.pkl", "wb")
pickle.dump(tokenizer, file)
file.close()

### Preprocessing Images

In [16]:
images_processed = []
image_labels_unique = []

for image in tqdm(image_files):
    file = os.path.join("./images/", image)
    try:
        image_read = cv.cvtColor(cv.imread(file),cv.COLOR_BGR2RGB)
        image_resized = cv.resize(image_read, (80,60))
        images_processed.append(image_resized)
        image_labels_unique.append(image)
    except:
        pass

100%|██████████████████████████████████████████████████████████████████████████| 14791/14791 [00:14<00:00, 1049.30it/s]


In [17]:
images_array = np.array(images_processed, np.float32) / 255.

In [18]:
mean_img = images_array.mean(axis=0)
std_dev = images_array.std(axis = 0)
images_normalized = (images_array - mean_img)/ std_dev

In [19]:
file = open("image_labels_unique.pkl", "wb")
pickle.dump(image_labels_unique, file)
file.close()

In [20]:
file = open("image_data_unique.pkl", "wb")
pickle.dump(images_normalized, file)
file.close()