In [1]:
import os
import clip
import tqdm
import torch
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
from os.path import join
from os.path import isfile, join
from torch.autograd import Variable
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
images=pd.read_pickle('images.pkl')
keywords=pd.read_pickle('keywords.pkl')

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [4]:
def get_image_vector(image_path: str) -> np.ndarray:
   image = Image.open(image_path).convert('RGB')
   image_tensor = preprocess(image).unsqueeze(0).to(device)

   with torch.no_grad():
       features = model.encode_image(image_tensor)

   # Convert PyTorch tensor to NumPy array
   feature_vector = features.cpu().numpy()

  # Convert the NumPy array to a list of floats
   feature_vector = feature_vector.astype(float).tolist()
   feature_vector=feature_vector[0]
   return feature_vector

In [5]:
client = QdrantClient(host="localhost", port=6333)

In [6]:
client.recreate_collection(
    collection_name="image_search",
    vectors_config=VectorParams(size=512, distance=Distance.COSINE),
)

ResponseHandlingException: [Errno 111] Connection refused

In [8]:
image_directory = "./test/"
all_image_paths = [join(image_directory, f) for f in os.listdir(image_directory) if isfile(join(image_directory, f))]
for image_path in all_image_paths:
    image_vector = get_image_vector(image_path)
    filename = os.path.basename(image_path)
    filename_without_extension = os.path.splitext(filename)[0]
    binary_key = ''.join(format(ord(char), '08b') for char in filename_without_extension)
    key = int(binary_key, 2)
    key = int(str(key)[0:17])
    image_data = images[images['photo_id'] == filename_without_extension]
    ai_data = keywords[keywords['photo_id'] == filename_without_extension]
    desc = image_data['photo_description'].iloc[0]
    url = image_data['photo_image_url'].iloc[0]
    ai_desc = image_data['ai_description'].iloc[0]
    keywords_list = ai_data["keyword"].tolist()
    client.upsert(
    collection_name="image_search",
    points=[
        PointStruct(
            id=key,
            vector=image_vector,
            payload={
                "url" : url,
                "ai_desc": ai_desc,
                "keywords": keywords_list
            }
        )
    ]
    )

In [6]:
query_vector = get_image_vector("/home/bored/Downloads/image-search-resnet/python/test/9R7voH34evo.jpg")
hits = client.search(
    collection_name="image_search",
    query_vector=query_vector,
    limit=5
)

In [7]:
for hit in hits:
    print(hit.payload.get("url"))
    print()

https://images.unsplash.com/photo-1459801625673-fb0937aab7d1
https://images.unsplash.com/photo-1585254904349-f7e1a1d6ce90
https://images.unsplash.com/photo-1580584916118-df1dfa213ed5
https://images.unsplash.com/photo-1444276233377-6a54153f8d5c
https://images.unsplash.com/photo-1579553574894-6852fad8f461


In [8]:
import numpy as np

text_query = ["buildings"]
text_tokens = clip.tokenize(text_query).to(device)
text_vector = model.encode_text(text_tokens)

# Convert the tensor to a list of floats
text_vector = text_vector.detach().cpu().numpy().tolist()

In [9]:
hits = client.search(
  collection_name="image_search",
  query_vector=text_vector[0],
  limit=1
)

In [10]:
for hit in hits:
    print(hit)

id=60689095620351518 version=182 score=0.25865692 payload={'ai_desc': 'grey building under grey clouds', 'keywords': ['tower', 'fountain', 'building', 'turbine', 'skyscraper', 'outdoors', 'house', 'sea', 'light', 'red sky', 'space', 'cliff', 'hotel', 'pollution', 'flag', 'globe', 'town', 'hut', 'smoke', 'winter', 'pyramid', 'astronomy', 'urban area', 'landscape', 'automobile', 'aircraft', 'night', 'clock tower', 'tornado', 'engine', 'gray', 'town square', 'office building', 'control tower', 'blimp', 'spire', 'freeway', 'car', 'planet', 'pattern', 'machine', 'nature', 'neighborhood', 'column', 'high rise', 'scenery', 'fog', 'sunset', 'monument', 'sphere', 'architecture', 'dusk', 'ornament', 'symbol', 'cylinder', 'moon', 'electrical device', 'triangle', 'wall', 'aerial view', 'corner', 'snow', 'steeple', 'horizon', 'ocean', 'countryside', 'antenna', 'skylight', 'water tower', 'tarmac', 'concrete', 'military', 'cloud', 'sea waves', 'transportation', 'metropolis', 'airship', 'cabin', 'mist

In [9]:
import base64

def image_to_base64(img_path):
   with open(img_path, "rb") as img_file:
       return base64.b64encode(img_file.read()).decode('utf-8')

# Usage
img_path = './test/YAb8F9M1Y3o.jpg'
base64_str = image_to_base64(img_path)
print(base64_str)

/9j/4AAQSkZJRgABAQEASABIAAD/4gJASUNDX1BST0ZJTEUAAQEAAAIwQURCRQIQAABtbnRyUkdCIFhZWiAH0AAIAAsAEwAzADthY3NwQVBQTAAAAABub25lAAAAAAAAAAAAAAAAAAAAAAAA9tYAAQAAAADTLUFEQkUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAApjcHJ0AAAA/AAAADJkZXNjAAABMAAAAGt3dHB0AAABnAAAABRia3B0AAABsAAAABRyVFJDAAABxAAAAA5nVFJDAAAB1AAAAA5iVFJDAAAB5AAAAA5yWFlaAAAB9AAAABRnWFlaAAACCAAAABRiWFlaAAACHAAAABR0ZXh0AAAAAENvcHlyaWdodCAyMDAwIEFkb2JlIFN5c3RlbXMgSW5jb3Jwb3JhdGVkAAAAZGVzYwAAAAAAAAARQWRvYmUgUkdCICgxOTk4KQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAWFlaIAAAAAAAAPNRAAEAAAABFsxYWVogAAAAAAAAAAAAAAAAAAAAAGN1cnYAAAAAAAAAAQIzAABjdXJ2AAAAAAAAAAECMwAAY3VydgAAAAAAAAABAjMAAFhZWiAAAAAAAACcGAAAT6UAAAT8WFlaIAAAAAAAADSNAACgLAAAD5VYWVogAAAAAAAAJjEAABAvAAC+nP/bAIQAAgICAwMDAwQEAwUFBQUFBwYGBgYHCgcIBwgHCg8KCwoKCwoPDhEODQ4RDhgTERETGBwYFxgcIh8fIispKzg4SwECAgIDAwMDBAQDBQUFBQUHBgYGBgcKBwgHCAcKDwoLCgoLCg8OEQ4NDhEOGBMRERMYHBgXGBwiHx8iKykrODhL/8IAEQgPABaAAwEiAAIRAQMR

In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Yova/SmallCap7M")
model = AutoModelForSeq2SeqLM.from_pretrained("Yova/SmallCap7M")
from PIL import Image
import requests
from io import BytesIO

def describe_image(image_path):
   # Load the image
   img = Image.open(image_path)

   # Convert the image to bytes
   img_bytes = BytesIO()
   img.save(img_bytes, format='JPEG')
   img_bytes = img_bytes.getvalue()

   # Send a POST request to the API endpoint
   response = requests.post('https://api-inference.huggingface.co/models/Yova/SmallCap7M', 
                           headers={'Authorization': 'Bearer YOUR_HUGGINGFACE_API_TOKEN'}, 
                           json={'inputs': img_bytes})

   # Parse the JSON response
   data = response.json()

   # Return the description of the image
   return data['generated_text']

# Test the function
description = describe_image('./test/YAb8F9M1Y3o.jpg')
print(description)

KeyError: 'smallcap'

In [11]:
keywords["keyword"].tolist()

['flower',
 'forest',
 'farm',
 'spring',
 'snow',
 'compass',
 'field',
 'bottle',
 'photographer',
 'jar',
 'reed',
 'blossom',
 'sweatshirt',
 'plant',
 'grove',
 'trademark',
 'corn',
 'ice',
 'hat',
 'tree',
 'cherry blossom',
 'geranium',
 'lawn',
 'plateau',
 'vegetation',
 'crocus',
 'clothing',
 'grassland',
 'soil',
 'petal',
 'cross',
 'nature',
 'fog',
 'woodland',
 'vegetable',
 'moss',
 'planter',
 'sport',
 'birch',
 'tulip',
 'building',
 'grass',
 'herbs',
 'fitness',
 'tree trunk',
 'fashion',
 'housing',
 'apparel',
 'bike',
 'transportation',
 'weather',
 'daisies',
 'walking',
 'vehicle',
 'vase',
 'beanie',
 'mist',
 'white',
 'paper',
 'robe',
 'office building',
 'iris',
 'meadow',
 'coat',
 'lupin',
 'landscape',
 'outdoors',
 'flag',
 'logo',
 'bicycle',
 'exercise',
 'countryside',
 'lightbulb',
 'park bench',
 'fruit',
 'fir',
 'furniture',
 'working out',
 'texture',
 'ball',
 'frost',
 'daisy',
 'hail',
 'symbol',
 'natural environment',
 'rural',
 'arenar