In [1]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

## Create Gemini Embeddings

In [2]:
embeddings = GoogleGenerativeAIEmbeddings(
    model='gemini-embedding-001'
)

### Embed a Single Query

In [3]:
vector = embeddings.embed_query("What is LangChain?")
print(len(vector))   # embedding dimension

3072


### Embed Multiple Documents

In [4]:
texts = [
    "LangChain is a framework for LLM applications.",
    "FastAPI is used for building APIs.",
    "PyTorch is a deep learning framework."
]

vectors = embeddings.embed_documents(texts)

print(len(vectors))        # number of documents
print(len(vectors[0]))     # vector dimension

3
3072


### Use with Vector Stores (Example: FAISS)

In [5]:
# from langchain_community.vectorstores import FAISS

# db = FAISS.from_texts(texts, embeddings)

# query = "What is LangChain used for?"
# docs = db.similarity_search(query, k=2)

# for d in docs:
#     print(d.page_content)

## Cosine Similarity Example

### Documents & Query

In [6]:
documents = [
    "Cricket is a popular sport played between two teams.",
    "Virat Kohli is one of the greatest batsmen in cricket.",
    "Test cricket is played over five days.",
    "Football is different from cricket."
]

query = "Who is a famous batsman in cricket?"

### Generate Embeddings

In [10]:
doc_vectors = embeddings.embed_documents(documents)
query_vector = embeddings.embed_query(query)

### Cosine Similarity

In [12]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

doc_vectors = np.array(doc_vectors)
query_vector = np.array(query_vector).reshape(1, -1)

scores = cosine_similarity(query_vector, doc_vectors)[0]

for i, score in enumerate(scores):
    print(f"Similarity with D{i+1}: {score:.4f}")

Similarity with D1: 0.6678
Similarity with D2: 0.7286
Similarity with D3: 0.6230
Similarity with D4: 0.6195


### Most Relevant Document

In [14]:
best_match = scores.argmax()
print("Most relevant document:")
print(documents[best_match])

Most relevant document:
Virat Kohli is one of the greatest batsmen in cricket.
