## Vectors embeddings & similarity: visualized

![What is a vector](images/weaviate-explainer-vector.png)

In [1]:
import numpy as np
import plotly.express as px
from sklearn.decomposition import PCA
import pandas as pd
import boto3
import json

Helper function to generate embeddings with Bedrock

In [2]:
from numpy import ndarray

def get_embeddings(sent_inputs: list[str]) -> ndarray:
    bedrock_runtime = boto3.client(
        service_name='bedrock-runtime',
        region_name='us-west-2',
    )

    embeddings = []
    for sent in sent_inputs:
        body = json.dumps({
            "inputText": sent,
        })

        response = bedrock_runtime.invoke_model(
            body=body,
            modelId='amazon.titan-embed-text-v2:0',
            accept='application/json',
            contentType='application/json'
        )

        response_body = json.loads(response['body'].read())
        embeddings.append(response_body.get('embedding'))

    return np.array(embeddings)

Let's embed these sentences

In [3]:
sent_inputs = [
    # Cat-related sentences
    "The Bengal showed off its striking coat pattern.",
    "A lion's powerful roar echoed through the plains.",
    "A leopard's spots provided perfect camouflage in the dappled light.",
    "A cheetah's unmatched speed allowed it to outrun its prey.",
    "The Sphynx basked in the warmth of its owner's lap.",
    # Dog-related sentences
    "The golden retriever chased after the frisbee.",
    "The playful puppy rolled in the grass.",
    "A loyal companion is always by your side.",
    "The Labrador retriever enjoyed playing in the water.",
    "The family adopted a furry friend from the shelter."
]

emb_array = get_embeddings(sent_inputs)

![How embeddings are created](images/weaviate-explainer-embedding-creation.png)

In [4]:
print(emb_array.shape)  # Should print (10, embedding_dimension)
for emb in emb_array[:3]:  # For the first 3 embeddings
    print(emb[:5])  # Print first 5 dimensions of each embedding

(10, 1024)
[-0.05483039  0.02138026 -0.0146731  -0.03620926 -0.08938742]
[-0.05266266  0.06918281  0.01735426  0.01537651  0.02480765]
[-0.0571584   0.02600997  0.07072666 -0.02121881  0.02741901]


## Visualize the embeddings

Helper functions to reduce the dimensionality

In [5]:
from pandas import DataFrame

def plot_2d_embeddings(df_in: DataFrame):
    fig = px.scatter(
        df_in,
        template="ggplot2",
        x="PC1",
        y="PC2",
        color="category",
        hover_data="sentence",
    )
    fig.update_layout(margin=dict(l=20, r=20, b=20, t=20, pad=4))
    fig.update_traces(marker_size=20)
    return fig


def plot_vectors(arr_in: ndarray, text_inputs: list):
    pca = PCA(n_components=2)
    embeddings_pca = pca.fit_transform(arr_in)

    df = pd.DataFrame(embeddings_pca, columns=["PC1", "PC2"])
    df["sentence"] = text_inputs
    df["category"] = "other"
    df.loc[:4, "category"] = "cats"
    df.loc[5:9, "category"] = "dogs"
    fig = plot_2d_embeddings(df)
    return fig

What do they look like?

In [6]:
fig = plot_vectors(emb_array, sent_inputs)
fig.show()

![How embeddings are compared](images/weaviate-explainer-embedding-comparison.png)

### Try adding more data

In [7]:
def add_embeddings_to_array(new_sents: list[str], old_embeddings: ndarray, old_sents: list[str]) -> tuple[ndarray, list[str]]:
    resp = get_embeddings(new_sents)
    new_embeddings = old_embeddings
    for emb in resp:
        new_embeddings = np.vstack([new_embeddings, emb])
    sents_out = old_sents + new_sents
    return new_embeddings, sents_out

In [8]:
emb_array, sent_inputs = add_embeddings_to_array(
    ["Who doesn't love going to the Alps in the summer?"],
    emb_array,
    sent_inputs
)

In [9]:
fig = plot_vectors(emb_array, sent_inputs)
fig.show()

Even more sentences

In [10]:
new_sents = [
    "The chef prepared a delicious vegetable stir-fry for dinner.",
    "The astronomer gazed at the distant stars, searching for undiscovered galaxies.",
    "The young artist skillfully painted a beautiful landscape on the canvas.",
    "John is loving taking his new puppy out for walks in the morning."
]

In [11]:
emb_array, sent_inputs = add_embeddings_to_array(new_sents, emb_array, sent_inputs)
fig = plot_vectors(emb_array, sent_inputs)
fig.show()

![Vector search](images/weaviate-explainer-embedding-vector-search.png)