### This script will validate the model using the validation dataset. The validation metrics used is normalized Discounted Cumulative Gain.

In [None]:
from tensorflow.keras.models import load_model
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from numpy import dot
from numpy.linalg import norm
from webapp.utils import *

os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [None]:
# load pretrained model and validation dataset
weigths_path = '../pretrained/model.h5'
model = load_model(weigths_path)

x = os.listdir('../validatoin_dataset/extended_sketch/')
x.sort()
y = os.listdir('../validatoin_dataset/extended_image/')
y.sort()
input_output_association = dict(zip(x,y))
top_k = 5
nDCGs = []

In [None]:
# Loop through every sketch and image in input_output_association and calculate the 
# normalized Discounted Cumulative Gain
for i in range(len(input_output_association)):

    sketch_file = x[i]
    image_file = y[i]

    sketch_file = '../validatoin_dataset/extended_sketch/' + sketch_file
    image_file = '../validatoin_dataset/extended_image/' + image_file
    
    # calculate embeddings for sketch file and image file
    _, embedding_vector,_ = get_embeddings(sketch_file)
    class_name, _,style_  = get_embeddings(image_file)

    # connect to the amazon database and pull other image embeddgins from there
    conn, cursor = connect()
    if len(class_name) == 1:
        sql_query = f"select * from image_embeddings where style in (\'{class_name[0]}\')"
    else:
        sql_query = f"select * from image_embeddings where style in {str(tuple(class_name))}"

    df = pd.read_sql(sql_query,conn)

    conn.close()

    # Calculate the similarities between the uploaed image and all images in the database.
    similarity = []
    for ind in range(len(df)):
        embeddings = np.fromstring(df.iloc[ind,3], dtype=float, sep=',')
        similarity.append(cosine_similarity(embeddings,embedding_vector))

    df['similarity'] = similarity
    df = df.sort_values(by = 'similarity', ascending=False)
    similarity_ = cosine_similarity(embedding_vector,embedding_image)

    # Get the rank of the top 5 images
    rank = df.copy().loc[:,['style','similarity']]
    tmp_ = pd.DataFrame({'style': [style_], 'similarity': [similarity_],'ranking': [1]})
    rank = rank.append(tmp_).sort_values('similarity',ascending = False)
    rank = rank.fillna(0)
    rank.loc[(rank['style'] == style_), 'ranking' ] = 1
    weights = [1/np.log2(i+2) for i in range(rank.shape[0])]
    rank['weights'] = weights
    rank = rank.iloc[:top_k]

    perfect_ranking = np.ones(rank.shape[0])
    perfect_ranking[0] = 1

    # Calculate the nDCG metrics
    DCG = np.sum(rank['ranking'] * rank['weights'])
    iDCG = np.sum(np.array(perfect_ranking) * np.array(weights[:top_k]))
    nDCG = DCG/iDCG

    nDCGs.append(nDCG)
