In [1]:
import os
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity as ssim
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

def extract_image_features(img_path):
    """
    Extract features from an image using VGG16.
    """
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    
    features = model.predict(img_data)
    return features.flatten()

def calculate_cosine_similarity(features1, features2):
    """
    Calculate the cosine similarity between two feature vectors.
    """
    return cosine_similarity([features1], [features2])[0][0]

def calculate_ssim(img_path1, img_path2):
    """
    Calculate the Structural Similarity Index (SSIM) between two images.
    """
    try:
        img1 = Image.open(img_path1).convert('L')  # Convert to grayscale
        img2 = Image.open(img_path2).convert('L')  # Convert to grayscale
        img1 = img1.resize((224, 224))
        img2 = img2.resize((224, 224))
        img1 = np.array(img1)
        img2 = np.array(img2)
        return ssim(img1, img2)
    except Exception as e:
        print(f"Error calculating SSIM between {img_path1} and {img_path2}: {e}")
        return 0.0

def calculate_combined_similarity(features1, features2, img_path1, img_path2, weight_cosine=0.5, weight_ssim=0.5):
    """
    Calculate combined similarity using cosine similarity of features and SSIM score.
    """
    cosine_sim = calculate_cosine_similarity(features1, features2)
    ssim_score = calculate_ssim(img_path1, img_path2)
    
    return weight_cosine * cosine_sim + weight_ssim * ssim_score

# Paths to training and test folders
train_folder = "./train"
test_folder = "./test"

# List of training and test images
train_images = ["2024.03.15_0954.jpg", "2024.03.15_1145.jpg", "Faller_8.jpg", "invoice_77073.jpg", "invoice_102856.jpg"]
test_images = ["invoice_77098.jpg", "invoice_102857.jpg"]

# Dictionary to store training images
database = {}

# Extract and store training images
for img_name in train_images:
    img_path = os.path.join(train_folder, img_name)
    features = extract_image_features(img_path)
    database[img_name] = features

# Compare test images to training images
for test_img in test_images:
    test_img_path = os.path.join(test_folder, test_img)
    test_features = extract_image_features(test_img_path)
    
    best_match = None
    highest_similarity = 0
    
    for train_img, train_features in database.items():
        train_img_path = os.path.join(train_folder, train_img)
        similarity = calculate_combined_similarity(test_features, train_features, test_img_path, train_img_path)
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = train_img
    
    print(f"Test Image: {test_img}")
    print(f"Most Similar Image: {best_match}")
    print(f"Similarity Score: {highest_similarity}\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 157ms/step
Test Image: invoice_77098.jpg
Most Similar Image: invoice_77073.jpg
Similarity Score: 0.9984165075420612

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step
Test Image: invoice_102857.jpg
Most Similar Image: invoice_102856.jpg
Similarity Score: 0.9492248292722529

