## Building MultiModal Search with Vector Databases 

### Dependencies

    1. The Weaviate Python Client

In [None]:
! pip install -U "weaviate-client==4.5.4"

## Connect to Weaviate

In [None]:
%env PALM_API_KEY=ya29.here...

In [None]:
import weaviate, os

# Connect to a cloud instance of Weaviate (with WCS)
client = weaviate.connect_to_wcs(
    cluster_url=os.getenv("WORKSHOP_DEMO_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WORKSHOP_DEMO_KEY_ADMIN")),
    
    headers={
        "X-PALM-Api-Key": os.getenv("PALM_API_KEY"),
    }
)

client.is_ready()

In [None]:
client.get_meta()

## Create the `Animals` Collection

In [None]:
from weaviate.classes.config import Configure

if(client.collections.exists("Animals")):
    client.collections.delete("Animals")

client.collections.create(
    name="Animals",
        vectorizer_config=Configure.Vectorizer.multi2vec_palm(
        image_fields=["image"],
        video_fields=["video"],

        project_id="semi-random-dev",       # replace this with your Google project enabled to use multimodalembedding@001
        location="us-central1",
        model_id="multimodalembedding@001", # The multimodal embedding model
        dimensions=1408, # default: 1408 available settings: 128, 256, 512, 1408ar - video embeddings require 1408
    )
)

In [None]:
import base64

# Helper function to convert a file to base64 representation
def toBase64(path):
    with open(path, 'rb') as file:
        return base64.b64encode(file.read()).decode('utf-8')


## Insert Images into Weaviate

> if you get timeout errors, reduce the value from '5' in
> `if (len(items) == 5):`

In [None]:
animals = client.collections.get("Animals")

source = os.listdir("./source/image/")

with animals.batch.rate_limit(requests_per_minute=100) as batch:
    for name in source:
        print(f"Adding {name}")
        
        path = "./source/image/" + name
    
        batch.add_object({
            "name": name,            # name of the file
            "path": path,            # path to the file to display result
            "image": toBase64(path), # this gets vectorized - "image" was configured in vectorizer_config as the property holding images
            "mediaType": "image",    # a label telling us how to display the resource 
        })


In [None]:
# Check for failed objects
if len(animals.batch.failed_objects) > 0:
    print(f"Failed to import {len(animals.batch.failed_objects)} objects")
    for failed in animals.batch.failed_objects:
        print(f"e.g. Failed to import object with error: {failed.message}")
else:
    print("No errors")

## Check count
> Total count should be 9 (9x image)

In [None]:
#Object count
animals = client.collections.get("Animals")
animals.aggregate.over_all()

## Insert Video Files into Weaviate
> Note. the input video must be at least 4 seconds long

In [None]:
animals = client.collections.get("Animals")

source = os.listdir("./source/video/")

for name in source:
    print(f"Adding {name}")
    path = "./source/video/" + name    

    # insert videos one by one
    animals.data.insert({
        "name": name,
        "path": path,
        "video": toBase64(path),
        "mediaType": "video"
    })

## Check count
> Total count should be 15 (9x image + 6x video)

In [None]:
animals.aggregate.over_all()

In [None]:
agg = animals.aggregate.over_all(
    group_by="mediaType"
)

for group in agg.groups:
    print(group)


## Check all the media files added to the Vector Database

In [None]:
itr = animals.iterator(
    return_properties=["name", "mediaType"],
    # include_vector=True, # in case you want to see the vectors
)

for item in itr:
    print(item.properties)

In [None]:
client.close()