<a href="https://colab.research.google.com/github/vivorima/CBIR-NearDuplicateDetection/blob/main/Near_Duplicates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from scipy.spatial.distance import cosine
import pandas as pd
import torch
import torch.nn as nn
import torch.hub

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Directory containing the images to compare
images_directory = '/content/drive/MyDrive/test_dataset'
# "..\corpus_lipade\presse\photos\jpg"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def preprocess_image(image_path):
    input_image = Image.open(image_path)
    preprocess = transforms.Compose([
        # Convert the image to grayscale
        transforms.Grayscale(num_output_channels=3),
        # Since you've changed your images to grayscale (1-channel), you also need to modify the first layer of the model to accept 1-channel input. However, remember that this will invalidate the pretrained weights for this layer. Here's how you can modify the first layer:

        # this is how images are preprocessed when trained on Imagenet
        transforms.Resize(256),
        transforms.CenterCrop(224),

        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)  # create a mini-batch as expected by the model
    return input_batch

def get_features(image_batch, model):
    if torch.cuda.is_available():
        image_batch = image_batch.to('cuda')
    with torch.no_grad():
        features = model(image_batch)
    return features.cpu().numpy()


def compare_images(feature1, feature2, threshold=0.85):
    similarity = 1 - cosine(feature1.flatten(), feature2.flatten())
    return similarity > threshold

# Load the pre-trained ResNet model

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)

# # Modify the first convolutional layer
# # Original first layer: 3 input channels, 64 output channels, kernel size 7, stride 2, padding 3, bias=False
# model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Remove the last classification layer
model = torch.nn.Sequential(*(list(model.children())[:-1]))  # Remove the last classification layer
model.eval()
if torch.cuda.is_available():
    model.to('cuda')

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


# Extracting features of all images

In [None]:
image_features = {}
for image_name in os.listdir(images_directory):
    if image_name.endswith(".jpg"):
        image_path = os.path.join(images_directory, image_name)
        image_batch = preprocess_image(image_path)
        features = get_features(image_batch, model)
        image_features[image_name] = features

# Compare each image to every other image

In [None]:
similar_images = {}
for img1, features1 in image_features.items():
    similar_images[img1] = []
    for img2, features2 in image_features.items():
        if img1 != img2 and compare_images(features1, features2):
            similar_images[img1].append(img2)

# Convert the results to a DataFrame for easy export to Excel

In [None]:
df = pd.DataFrame([(k, v) for k, v in similar_images.items()], columns=['Reference Image', 'Similar Images'])
df['Similar Images'] = df['Similar Images'].apply(lambda x: ', '.join(x))

# Save to Excel file
df.to_excel('/content/drive/MyDrive/similar_images.xlsx', index=False)

# Formatting the data

In [None]:
import pandas as pd

# Load the ground truth dataset
file_path_ground_truth = '/content/drive/MyDrive/test_dataset/donnees_IS_new.xlsx'  # Replace with your file path
ground_truth_df = pd.read_excel(file_path_ground_truth)

# Set the first column as the reference image
ground_truth_df['Reference Image'] = ground_truth_df.iloc[:, 0]

# Consolidate other columns into one
ground_truth_df['Similar Images'] = ground_truth_df.iloc[:, 1:].apply(
    lambda row: ', '.join(row.dropna().astype(str)), axis=1)

# Create a new DataFrame with the desired structure
reshaped_ground_truth_df = ground_truth_df[['Reference Image', 'Similar Images']]

# Optionally, save the reshaped DataFrame to a new Excel file
reshaped_ground_truth_df.to_excel('/content/drive/MyDrive/reshaped_ground_truth.xlsx', index=False)


In [None]:
def calculate_metrics(ground_truth, predictions):

    # Initialize counters
    tp, fp, fn = 0, 0, 0

    # Convert similar images in ground truth to a set for efficient lookup
    ground_truth_dict = ground_truth.set_index('Reference Image')['Similar Images'].to_dict()
    ground_truth_dict = {k: set(str(v).split(', ')) for k, v in ground_truth_dict.items()}

    # Iterate through each reference image in the predictions
    for index, row in predictions.iterrows():
        ref_image = row['Reference Image']
        predicted_similar = set(str(row['Similar Images']).split(', '))

        # Get the corresponding ground truth similar images
        actual_similar = ground_truth_dict.get(ref_image, set())

        # Calculate TP, FP, and FN
        tp += len(predicted_similar.intersection(actual_similar))
        fp += len(predicted_similar - actual_similar)
        fn += len(actual_similar - predicted_similar)

    # Calculate precision, recall, and F1 score
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1_score

In [None]:
# Load the datasets
file_path_ground_truth = '/content/drive/MyDrive/reshaped_ground_truth.xlsx'  # Replace with your file path
file_path_predictions = '/content/drive/MyDrive/similar_images.xlsx'  # Replace with your file path

ground_truth_df = pd.read_excel(file_path_ground_truth)
predictions_df = pd.read_excel(file_path_predictions)

# Calculate the metrics
precision, recall, f1 = calculate_metrics(ground_truth_df, predictions_df)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Precision: 0.048879837067209775
Recall: 0.35294117647058826
F1 Score: 0.08586762075134169


## **RESNET 101**
# Threshold: 0.9
* Precision: 0.05851063829787234
* Recall: 0.16176470588235295
* F1 Score: 0.0859375


# Threshold: 0.89
* Precision: 0.05963302752293578
* Recall: 0.19117647058823528
* F1 Score: 0.09090909090909093

# Threshold: 0.88
* Precision: **0.061567164179104475**
* Recall: 0.2426470588235294
* F1 Score: **0.0982142857142857**

# Threshold: 0.87
* Precision: 0.056338028169014086
* Recall: 0.27941176470588236
* F1 Score: 0.09376927822331894

# Threshold: 0.85
* Precision: 0.043254817987152035
* Recall: **0.3713235294117647**
* F1 Score: 0.0774836977368623


## **RESNET 152**
# Threshold: 0.88
* Precision: 0.06626506024096386
* Recall: 0.2426470588235294
* F1 Score: 0.10410094637223975

# Threshold: 0.85
* Precision: 0.048879837067209775
* Recall: 0.35294117647058826
* F1 Score: 0.08586762075134169

# Notes
* trier et prendre les '10' plus similaires
* pas de threshold unique
* augmentations, transformations aléatoires  ? yes
lesquelles ?
* fine tuning sur les classes, evaluer sur une partie du test dataset (mini testset)
(tache classification, use cross entropy classifica)
* captions ? not for resnet
* début janvier
* score ndcg pour prendre en compte le classement
* tesni ??? colorier les classes pour la présentation

* faire un tri et couper chaque 10 a 9 image
* la classe qui a le plus d'image pour le fait de couper
* Protocole d'évaluation précis
* changer bert par un autre
* SNI
* ndcg la position des images sur au lieu du average precision