# Embeddings

We need to choose the initial embedding model carefully!

Different embedding models can not interact with each other, meaning we would need to re-embed an entire set of documents if we switch embedding models in the future.

In [10]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import CSVLoader
import os

key = os.getenv("OPENAI_API_KEY")

In [3]:
embeddings = OpenAIEmbeddings()

In [5]:
embeddings.model

'text-embedding-ada-002'

## Embed single string

In [4]:
text = "this is some normal text string that I want to embed as a vector"

In [6]:
embedded_text = embeddings.embed_query(text)

In [7]:
embedded_text[:10]

[-0.021364752528356994,
 0.0007997061014803541,
 -0.017900947753812197,
 0.00746103713467312,
 0.01028750293711291,
 0.009095952984532973,
 -0.01670939873255485,
 -0.006227922254955556,
 -0.02186354096723442,
 -0.014547983405849467]

In [8]:
len(embedded_text)

1536

## Embed a document

In [11]:
loader = CSVLoader("some_data/penguins.csv")

data = loader.load()

In [12]:
# Result is a list of vectors
embedded_docs = embeddings.embed_documents([text.page_content for text in data])

In [13]:
len(embedded_docs)  # nr of rows

344

In [14]:
len(embedded_docs[0])  # Each row will be a vector

1536