# Model Prediction Process
* Preprocess the test dataset
* Predict 
* Save results to csv

################################################ Imports & Constants ####################################

In [1]:
import os
from config import config

CODA_DLL_PATH = config["preprocessing"]["coda_dll_path"]
H5_FILE = config["main"]["h5_file"]
CLASSES = config.get_classes()
IMG_SIZE = int(config["main"]["img_size"])

os.add_dll_directory(
    CODA_DLL_PATH
)  # https://github.com/tensorflow/tensorflow/issues/48868#issuecomment-841396124

import numpy as np
import tensorflow as tf

import preprocessing
import results
import vote

#####################################################################################################

In [2]:
TEST_H5_FILE = "SynthText_test.h5"
X = preprocessing.load_unlabeled_dataset(TEST_H5_FILE)
X.head()

[07:09:31] Creating unlabeled dataset started [h5_file=SynthText_test.h5]
[07:21:25] Function 'load_unlabeled_dataset' executed in 713.7733s


Unnamed: 0,img,char,word,img_name
0,"[[[105, 74, 57], [105, 74, 57], [105, 74, 57],...",s,say.,ant+hill_102.jpg_0
1,"[[[124, 91, 68], [124, 91, 68], [124, 91, 68],...",a,say.,ant+hill_102.jpg_0
2,"[[[123, 83, 62], [123, 83, 62], [123, 83, 62],...",y,say.,ant+hill_102.jpg_0
3,"[[[128, 68, 47], [128, 68, 47], [128, 68, 47],...",.,say.,ant+hill_102.jpg_0
4,"[[[183, 168, 151], [183, 168, 151], [183, 168,...",W,WPG,ant+hill_102.jpg_0


#### Normalize & prepare dataset

In [3]:
x_norm = np.array(X["img"].to_list(), dtype=np.float32) / 255

#### Load model & predict

In [4]:
FINAL_MODEL = "final_model.h5"
model = tf.keras.models.load_model(FINAL_MODEL)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 batch_normalization (BatchN  (None, 512)              2048      
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 4096)              2101248   
                                                                 
 batch_normalization_1 (Batc  (None, 4096)             16384     
 hNormalization)                                                 
                                                                 
 dropout (Dropout)           (None, 4096)              0

In [5]:
# predict & fix prediction by votes
y_pred = model.predict(x_norm)

#### Fix predictions with voting & save to CSV

In [9]:
y_pred_after_votes = vote.vote(X, y_pred)
# save to csv
results.to_csv(X, y_pred_after_votes, csv_file="results.csv")

In [10]:
np.argmax(y_pred_after_votes, axis=1)

array([3, 3, 3, ..., 3, 3, 3], dtype=int64)