# Image Tagging
1. Tensorflow (EfficientNet)
2. OpenCV (SIFT Feature Extraction)
3. PyTorch (ResNet50)

## 1. Tensorflow (EfficentNet)
Pretrained model from tensorflow

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications.efficientnet import EfficientNetB0, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image

In [2]:
# load pretrained model
model = EfficientNetB0(weights='imagenet')


def tf_tag_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    predictions = model.predict(img_array)
    labels = decode_predictions(predictions, top=5)[0]

    print('Predicted labels:')
    for label in labels:
        print(f'{label[1]} (Confidence: {label[2]:.2f})')

In [3]:
# example usage

tf_tag_image('store-camera-2.jpg')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted labels:
shopping_basket (Confidence: 0.48)
grocery_store (Confidence: 0.31)
confectionery (Confidence: 0.07)
shoe_shop (Confidence: 0.02)
tobacco_shop (Confidence: 0.02)


## 2. OpenCV (SIFT Feature Extraction)
Only extracts unique features, not labels

In [4]:
import cv2

In [5]:
def cv2_extract_feature(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(img, None)

    print(f'Detected {len(keypoints)} keypoints in the image')

In [6]:
cv2_extract_feature('store-camera-2.jpg')

Detected 2507 keypoints in the image


## 3. PyTorch (ResNet50)
Transfer learning model. Image tagging with ResNet

In [7]:
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision import models
from torchvision.models import ResNet50_Weights

In [8]:
# load pretrained resnet50 model
model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
model.eval()

# define preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load ImageNet labels
with open("imagenet_classes.txt") as f:
    labels_map = [line.strip() for line in f.readlines()]


def torch_tag_image(img_path, confidence_threshold=0.2):
    img = Image.open(img_path)
    img = transform(img).unsqueeze(0)

    with torch.no_grad():
        outputs = model(img)

    # convert to probabilities using softmax
    probabilities = torch.nn.functional.softmax(outputs[0], dim=0)

    # get sorted indices for predictions
    sorted_indices = torch.argsort(probabilities, descending=True)

    print(
        f'Predicted labels with confidence threshold {confidence_threshold}:')
    for idx in sorted_indices:
        confidence = probabilities[idx].item()
        if confidence >= confidence_threshold:
            print(f'{labels_map[idx]} (Confidence: {confidence:.2f})')

In [9]:
# example
torch_tag_image('store-camera-2.jpg', confidence_threshold=0.5)

Predicted labels with confidence threshold 0.5:
shopping basket (Confidence: 0.76)


# TODO:
1. Image caption
2. Object detection