In [1]:
import os
import numpy as np
from PIL import Image
from skimage.metrics import structural_similarity as ssim
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

# Load pre-trained VGG16 model + higher level layers
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

def extract_image_features(img_path):
    try:
        img = image.load_img(img_path, target_size=(224, 224))
        img_data = image.img_to_array(img)
        img_data = np.expand_dims(img_data, axis=0)
        img_data = preprocess_input(img_data)
        
        features = model.predict(img_data)
        return features.flatten()
    except Exception as e:
        print(f"Error extracting features from {img_path}: {e}")
        return np.array([])

def calculate_ssim(img_path1, img_path2):
    try:
        img1 = Image.open(img_path1).convert('L')  # Convert to grayscale
        img2 = Image.open(img_path2).convert('L')  # Convert to grayscale
        img1 = img1.resize((224, 224))
        img2 = img2.resize((224, 224))
        img1 = np.array(img1)
        img2 = np.array(img2)
        return ssim(img1, img2)
    except Exception as e:
        print(f"Error calculating SSIM between {img_path1} and {img_path2}: {e}")
        return 0.0

def calculate_combined_similarity(img_path1, img_path2, features1, features2, weight_ssim=0.5, weight_vgg16=0.5):
    ssim_score = calculate_ssim(img_path1, img_path2)
    vgg16_sim = np.dot(features1, features2) / (np.linalg.norm(features1) * np.linalg.norm(features2)) if features1.size and features2.size else 0.0
    return weight_ssim * ssim_score + weight_vgg16 * vgg16_sim

# Paths to training and test folders
train_folder = "./train"
test_folder = "./test"

# List of training and test images
train_images = ["2024.03.15_0954.jpg", "2024.03.15_1145.jpg", "Faller_8.jpg", "invoice_77073.jpg", "invoice_102856.jpg"]
test_images = ["invoice_77098.jpg", "invoice_102857.jpg"]

# Dictionary to store training image features
database = {}

# Extract and store features for training images
for img_name in train_images:
    img_path = os.path.join(train_folder, img_name)
    features = extract_image_features(img_path)
    database[img_name] = {
        'path': img_path,
        'features': features
    }

# Compare test images to training images
for test_img in test_images:
    test_img_path = os.path.join(test_folder, test_img)
    test_features = extract_image_features(test_img_path)
    
    best_match = None
    highest_similarity = 0
    
    for train_img, data in database.items():
        similarity = calculate_combined_similarity(test_img_path, data['path'], test_features, data['features'])
        if similarity > highest_similarity:
            highest_similarity = similarity
            best_match = train_img
    
    print(f"Test Image: {test_img}")
    print(f"Most Similar Image: {best_match}")
    print(f"Similarity Score: {highest_similarity}\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
Test Image: invoice_77098.jpg
Most Similar Image: invoice_77073.jpg
Similarity Score: 0.9984165075420612

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step
Test Image: invoice_102857.jpg
Most Similar Image: invoice_102856.jpg
Similarity Score: 0.9492247696676082

