In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

In [None]:
# Define the dataset name
dataset_name = "tf_flowers"

# Load the dataset, splitting it into training, validation, and test sets
# with_info=True includes dataset metadata
# as_supervised=True returns data as (image, label) pairs
(train_ds, validation_ds, test_ds), metadata = tfds.load(
    dataset_name,
    split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
    with_info=True,
    as_supervised=True
)

# Print information about the loaded dataset
print("Dataset loaded successfully.")
print(f"Dataset name: {metadata.full_name}")
print(f"Number of training examples: {metadata.splits['train[:80%]'].num_examples}")
print(f"Number of validation examples: {metadata.splits['train[80%:90%]'].num_examples}")
print(f"Number of test examples: {metadata.splits['train[90%:]'].num_examples}")
print(f"Number of classes: {metadata.features['label'].num_classes}")
print(f"Class names: {metadata.features['label'].names}")

# Display a few example images from the training dataset
print("\nDisplaying a few example images:")
fig = plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(train_ds.take(9)):
    ax = fig.add_subplot(3, 3, i + 1)
    ax.imshow(image)
    ax.set_title(metadata.features['label'].names[label.numpy()])
    ax.axis("off")
plt.show()

# Define image dimensions for preprocessing
IMG_WIDTH = 224
IMG_HEIGHT = 224

# Define a preprocessing function to resize and normalize images
def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = tf.cast(image, tf.float32) / 255.0  # Normalize pixel values to [0, 1]
    return image, label

# Apply preprocessing, batch, and prefetch the datasets for efficient training
train_ds = train_ds.map(preprocess_image).batch(32).prefetch(tf.data.AUTOTUNE)
validation_ds = validation_ds.map(preprocess_image).batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.map(preprocess_image).batch(32).prefetch(tf.data.AUTOTUNE)

print("\nPreprocessing applied: images resized and normalized.")
print("Datasets batched and prefetched for efficient training.")

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model

# Load the VGG16 base model with pre-trained ImageNet weights
# include_top=False removes the classification layer
# input_shape specifies the input image dimensions
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Get the output of the base model
x = base_model.output
# Flatten the output to feed into a dense layer
x = Flatten()(x)

# Create a new model that outputs the flattened features
feature_extraction_model = Model(inputs=base_model.input, outputs=x)

print("Feature Extraction Model Summary:")
feature_extraction_model.summary()

In [None]:
import numpy as np

# Initialize lists to store extracted features and corresponding labels for each dataset split
train_features = []
train_labels = []
validation_features = []
validation_labels = []
test_features = []
test_labels = []

print("Extracting features from training dataset...")
# Iterate through the training dataset, predict features using the feature extraction model
for images, labels in train_ds:
    features = feature_extraction_model.predict(images)
    train_features.append(features)
    train_labels.append(labels.numpy())

print("Extracting features from validation dataset...")
# Iterate through the validation dataset, predict features
for images, labels in validation_ds:
    features = feature_extraction_model.predict(images)
    validation_features.append(features)
    validation_labels.append(labels.numpy())

print("Extracting features from test dataset...")
# Iterate through the test dataset, predict features
for images, labels in test_ds:
    features = feature_extraction_model.predict(images)
    test_features.append(features)
    test_labels.append(labels.numpy())

# Concatenate the lists of features and labels into NumPy arrays
train_features = np.concatenate(train_features, axis=0)
train_labels = np.concatenate(train_labels, axis=0)
validation_features = np.concatenate(validation_features, axis=0)
validation_labels = np.concatenate(validation_labels, axis=0)
test_features = np.concatenate(test_features, axis=0)
test_labels = np.concatenate(test_labels, axis=0)

print("\nFeature extraction complete.")
print(f"Training features shape: {train_features.shape}")
print(f"Training labels shape: {train_labels.shape}")
print(f"Validation features shape: {validation_features.shape}")
print(f"Validation labels shape: {validation_labels.shape}")
print(f"Test features shape: {test_features.shape}")
print(f"Test labels shape: {test_labels.shape}")

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def get_image_similarity(query_features, dataset_features):
  """
  Calculates cosine similarity between a query feature vector and dataset feature vectors.

  Args:
    query_features: A NumPy array representing the feature vector of the query image.
    dataset_features: A NumPy array representing the feature vectors of the dataset images.

  Returns:
    A NumPy array containing the cosine similarity scores between the query image
    and each image in the dataset.
  """
  # Reshape the query features to be a 2D array (required by cosine_similarity)
  query_features = query_features.reshape(1, -1)
  # Calculate cosine similarity between the query features and dataset features
  similarity_scores = cosine_similarity(query_features, dataset_features)

  # Return the similarity scores (removing the extra dimension)
  return similarity_scores[0]

In [None]:
import numpy as np

def recommend_similar_images(query_features, dataset_features, dataset_info, num_recommendations):
  """
  Finds the most similar images in a dataset to a query image.

  Args:
    query_features: A NumPy array representing the feature vector of the query image.
    dataset_features: A NumPy array representing the feature vectors of the dataset images.
    dataset_info: A list or array containing information (e.g., labels, paths)
                  for each image in the dataset, corresponding to dataset_features.
    num_recommendations: The number of top similar images to return.

  Returns:
    A list containing the information (from dataset_info) of the top
    num_recommendations most similar images.
  """
  # Get cosine similarity scores between the query image and all dataset images
  similarity_scores = get_image_similarity(query_features, dataset_features)
  # Get the indices of the top num_recommendations most similar images
  # argsort sorts in ascending order, [::-1] reverses to get descending order
  top_indices = np.argsort(similarity_scores)[::-1]
  # Select the top indices
  top_indices = top_indices[:num_recommendations]
  # Get the information (labels in this case) for the recommended images
  recommended_images_info = [dataset_info[i] for i in top_indices]

  return recommended_images_info

In [None]:
# Define the number of random test images to use as queries
num_test_queries = 10
# Select random indices from the test set
random_indices = np.random.choice(len(test_features), num_test_queries, replace=False)
# Get the features and labels for the selected random query images
query_features_subset = test_features[random_indices]
query_labels_subset = test_labels[random_indices]

print(f"Selected {num_test_queries} random images from the test set as queries.")

# Define the number of recommendations to get for each query
num_recommendations = 10
# Initialize a list to store the recommendations for each query
recommendations = []

print(f"Getting {num_recommendations} recommendations from the training dataset for each query...")
# Iterate through the query images and get recommendations from the training dataset
for i, query_feature in enumerate(query_features_subset):
    recommended_info = recommend_similar_images(
        query_feature,
        train_features, # Use training features as the dataset to search for similar images
        train_labels,   # Use training labels to get information about recommended images
        num_recommendations
    )
    # Store the query label and the labels of the recommended images
    recommendations.append({
        'query_label': query_labels_subset[i],
        'recommended_labels': recommended_info
    })

print("Recommendations obtained for all queries.")

In [None]:
# Initialize a list to store precision scores for each query
precision_scores = []

print("Calculating precision for each query...")
# Iterate through the recommendations for each query
for rec in recommendations:
    query_label = rec['query_label']
    recommended_labels = rec['recommended_labels']

    # Count how many of the recommended images have the same label as the query image
    relevant_count = sum(1 for label in recommended_labels if label == query_label)

    # Calculate precision: relevant recommendations / total recommendations
    precision = relevant_count / num_recommendations
    precision_scores.append(precision)

# Calculate the average precision across all queries
average_precision = np.mean(precision_scores)

print("\nEvaluation Results:")
print(f"Number of test queries: {num_test_queries}")
print(f"Number of recommendations per query: {num_recommendations}")
print(f"Precision scores for each query: {precision_scores}")
print(f"Average Precision: {average_precision:.4f}")

print("\nDiscussion:")
# Provide a brief interpretation of the average precision score
if average_precision > 0.5:
    print("The recommendation system shows reasonably good performance with an average precision above 0.5, meaning more than half of the recommendations on average are of the same class as the query image.")
elif average_precision > 0.2:
    print("The recommendation system shows moderate performance. There is room for improvement to increase the proportion of relevant recommendations.")
else:
    print("The recommendation system's performance is relatively low. Further model training, architecture tuning, or exploring different similarity metrics might be needed.")

print(f"\nBased on the average precision of {average_precision:.4f}, the system's ability to retrieve visually similar images from the same class is evaluated.")