In [1]:
import openai, os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.decomposition import PCA
import pandas as pd

openai_key = os.environ["OPENAI_APIKEY"]
openai.api_key = openai_key

In [2]:
def get_emb(sent_inputs):
    oai_resp = openai.Embedding.create(
        input=sent_inputs,
        model="text-embedding-ada-002"
    )
    return oai_resp

In [3]:
def plot_embs(df_in):
    fig = px.scatter(df_in, 
               template="ggplot2",
               x="PC1", y="PC2", color="category",
               hover_data="sentence")
    fig.update_layout(
        margin=dict(
            l=20,
            r=20,
            b=20,
            t=20,
            pad=4
        )
    )
    fig.update_traces(marker_size=20)
    return fig

In [4]:
def plot_vectors(arr_in):
    pca = PCA(n_components=2)
    embeddings_pca = pca.fit_transform(arr_in)

    df = pd.DataFrame(embeddings_pca, columns=["PC1", "PC2"])
    df["sentence"] = sent_inputs
    df["category"] = "other"
    df.loc[:4, "category"] = "cats"
    df.loc[5:9, "category"] = "dogs"
    fig = plot_embs(df)
    return fig

In [5]:
def add_new_emb(sents_in, arr_in, sent_inputs_in):
    resp = get_emb(sents_in)
    for d in resp["data"]:
        arr_in = np.vstack([arr_in, d["embedding"]])
    sent_inputs_in = sent_inputs_in + sents_in
    return arr_in, sent_inputs_in

## Visual demo - vector embeddings

In [None]:
sent_inputs = [
    # Cat-related sentences
    "The Bengal showed off its striking coat pattern.",
    "A lion's powerful roar echoed through the plains.",
    "A leopard's spots provided perfect camouflage in the dappled light.",
    "A cheetah's unmatched speed allowed it to outrun its prey.",
    "The Sphynx basked in the warmth of its owner's lap.",
    # Dog-related sentences    
    "The golden retriever chased after the frisbee.",
    "The playful puppy rolled in the grass.",
    "A loyal companion is always by your side.",
    "The Labrador retriever enjoyed playing in the water.",
    "The family adopted a furry friend from the shelter."    
]

resp = (sent_inputs)
arr = np.array([i["embedding"] for i in resp["data"]])

In [None]:
fig = plot_vectors(arr)
fig.show()

In [None]:
arr, sent_inputs = add_new_emb(
    ["Who doesn't love going to the Alps in the summer?"], 
    arr, 
    sent_inputs
)

In [None]:
fig = plot_vectors(arr)
fig.show()

In [None]:
new_sents = [
    "The chef prepared a delicious vegetable stir-fry for dinner.",
    "The astronomer gazed at the distant stars, searching for undiscovered galaxies.",
    "The young artist skillfully painted a beautiful landscape on the canvas.",
    "John is loving taking his new puppy out for walks in the morning."
]

In [None]:
arr, sent_inputs = add_new_emb(new_sents, arr, sent_inputs)
fig = plot_vectors(arr)
fig.show()