In [1]:
from langchain_classic.schema import Document

doc1 = Document(
    page_content="Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, he is known for his athletic fielding.",
    metadata={"team": "Chennai Super Kings"}
)

doc2 = Document(
    page_content="MS Dhoni is a legendary wicketkeeper-batsman and former captain of Chennai Super Kings, admired for his calm leadership and finishing ability.",
    metadata={"team": "Chennai Super Kings"}
)

doc3 = Document(
    page_content="Virat Kohli is a world-class batsman representing Royal Challengers Bangalore, famous for his consistency and aggressive batting style.",
    metadata={"team": "Royal Challengers Bangalore"}
)

doc4 = Document(
    page_content="Rohit Sharma is an elegant opening batsman and captain of Mumbai Indians, known for his timing and ability to score big centuries.",
    metadata={"team": "Mumbai Indians"}
)

doc5 = Document(
    page_content="Jasprit Bumrah is a fast bowler for Mumbai Indians, recognized for his unique bowling action and deadly yorkers.",
    metadata={"team": "Mumbai Indians"}
)

documents = [doc1, doc2, doc3, doc4, doc5]

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from dotenv import load_dotenv
load_dotenv()

embeddings = GoogleGenerativeAIEmbeddings(
    model='gemini-embedding-001'
)

In [3]:
vector_store = Chroma(
    embedding_function=embeddings,
    persist_directory='chroma_db',
    collection_name='IPL_Sample'
)

In [4]:
vector_store.add_documents(documents=documents)

['e36b3994-2b40-4118-ab4a-c03ba1e33acd',
 '24dd5de4-c247-41f3-95e2-ed6b1a7f25bf',
 '4d05e33f-1071-4b83-9187-95acd1ca8fa8',
 'ff333b21-bb84-4b49-8ea2-6b4f5b6e0403',
 '7b6ea9e6-a561-4b33-9a53-92e7b0587d82']

In [5]:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['e36b3994-2b40-4118-ab4a-c03ba1e33acd',
  '24dd5de4-c247-41f3-95e2-ed6b1a7f25bf',
  '4d05e33f-1071-4b83-9187-95acd1ca8fa8',
  'ff333b21-bb84-4b49-8ea2-6b4f5b6e0403',
  '7b6ea9e6-a561-4b33-9a53-92e7b0587d82'],
 'embeddings': array([[-0.00847024, -0.02889338,  0.01362535, ...,  0.0075209 ,
         -0.01370478, -0.00378271],
        [-0.01292228, -0.01574806,  0.00057987, ...,  0.01188228,
         -0.01931828, -0.00334065],
        [-0.01209571,  0.0097548 ,  0.0141743 , ...,  0.02465927,
         -0.01216253, -0.00516963],
        [-0.02888269,  0.00097366,  0.00622522, ...,  0.00999573,
         -0.02119176, -0.01304592],
        [-0.01431694,  0.00138197,  0.00084822, ..., -0.00106886,
          0.00904597, -0.01857016]], shape=(5, 3072)),
 'documents': ['Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, he is known for his athletic fielding.',
  'MS Dhoni is a legendary wicketkeeper-batsman and former captain 

In [8]:
vector_store.similarity_search(
    query='who are the bowler?',
    k=2
)

[Document(id='7b6ea9e6-a561-4b33-9a53-92e7b0587d82', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is a fast bowler for Mumbai Indians, recognized for his unique bowling action and deadly yorkers.'),
 Document(id='e36b3994-2b40-4118-ab4a-c03ba1e33acd', metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, he is known for his athletic fielding.')]

In [10]:
vector_store.similarity_search_with_score(
    query='who is the fast bowler in Mumbai Indians?',
    k=2
)

[(Document(id='7b6ea9e6-a561-4b33-9a53-92e7b0587d82', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is a fast bowler for Mumbai Indians, recognized for his unique bowling action and deadly yorkers.'),
  0.40060752630233765),
 (Document(id='ff333b21-bb84-4b49-8ea2-6b4f5b6e0403', metadata={'team': 'Mumbai Indians'}, page_content='Rohit Sharma is an elegant opening batsman and captain of Mumbai Indians, known for his timing and ability to score big centuries.'),
  0.6540603041648865)]

In [17]:
results = vector_store.similarity_search_with_score(
    query="Best IPL team performance in playoffs",
    filter={"team": "Chennai Super Kings"}
)
print(results)

[(Document(id='24dd5de4-c247-41f3-95e2-ed6b1a7f25bf', metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni is a legendary wicketkeeper-batsman and former captain of Chennai Super Kings, admired for his calm leadership and finishing ability.'), 0.721157431602478), (Document(id='e36b3994-2b40-4118-ab4a-c03ba1e33acd', metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, he is known for his athletic fielding.'), 0.7707984447479248)]
