In [1]:
pip install chromadb langchain sentence-transformers huggingface_hub openai

Collecting chromadb
  Downloading chromadb-1.0.13-cp39-abi3-win_amd64.whl.metadata (7.1 kB)
Collecting langchain
  Downloading langchain-0.3.26-py3-none-any.whl.metadata (7.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp39-cp39-win_amd64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.3-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.19.2-cp39-cp39-win_amd64.whl.metadata (4.7 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.34.1-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_ot

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
googletrans 4.0.0rc1 requires httpx==0.13.3, but you have httpx 0.28.1 which is incompatible.

[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text = """
Chroma is an open-source embedding database. It is used to store and query vector embeddings efficiently. 
You can use Chroma with any embedding model and LLM for building semantic search, RAG, and chatbots.
"""

# Split into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    separators=["\n\n", "\n", ".", " "]
)
chunks = text_splitter.split_text(text)
print(chunks)


['Chroma is an open-source embedding database', '. It is used to store and query vector embeddings efficiently.', 'You can use Chroma with any embedding model and LLM for building semantic search, RAG, and chatbots', '.']


In [3]:
from sentence_transformers import SentenceTransformer

# Use open-source embedding model
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embed_model.encode(chunks)


  from .autonotebook import tqdm as notebook_tqdm





In [9]:
pip install --upgrade langchain langchain-community chromadb sentence-transformers transformers


Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting transformers
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Using cached dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-0.33.0-py3-none-any.whl.metadata (14 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
 


[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
# 📦 Required installations:
# pip install langchain chromadb sentence-transformers transformers


from langchain_community.document_loaders import TextLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline

from transformers import pipeline
import os

# 1. Sample text data
text = """Microsoft Corporation is an American multinational technology company 
headquartered in Redmond, Washington. Microsoft raised $61 million in its initial public offering in 1986.
Pando is a logistics software startup that raised $30 million in its Series B funding led by Iron Pillar.
"""

# 2. Split into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
docs = text_splitter.create_documents([text])

# 3. Create embeddings (using sentence-transformers)
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 4. Store in ChromaDB
persist_directory = 'db'
vectordb = Chroma.from_documents(documents=docs,
                                 embedding=embedding,
                                 persist_directory=persist_directory)
vectordb.persist()
vectordb = None

# 5. Reload ChromaDB
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding)
retriever = vectordb.as_retriever()

# 6. Load HuggingFace LLM (e.g., distilGPT2 or TinyLlama)
generator = pipeline("text-generation", model="distilgpt2", max_new_tokens=100)
llm = HuggingFacePipeline(pipeline=generator)

# 7. Build RAG QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# 8. Utility to print results and sources
def process_llm_response(llm_response):
    print("\nAnswer:\n", llm_response['result'])
    print('\nSources:')
    for source in llm_response["source_documents"]:
        print(source.page_content)

# 🔍 Query 1
query1 = "How much money did Microsoft raise?"
response1 = qa_chain(query1)
process_llm_response(response1)

# 🔍 Query 2
query2 = "What is the news about Pando?"
response2 = qa_chain(query2)
process_llm_response(response2)


  from .autonotebook import tqdm as notebook_tqdm
  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")





  vectordb.persist()
  vectordb = Chroma(persist_directory=persist_directory,
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cpu
  llm = HuggingFacePipeline(pipeline=generator)
  response1 = qa_chain(query1)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Answer:
 Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

headquartered in Redmond, Washington. Microsoft raised $61 million in its initial public offering

Microsoft Corporation is an American multinational technology company

Pando is a logistics software startup that raised $30 million in its Series B funding led by Iron

offering in 1986.

Question: How much money did Microsoft raise?
Helpful Answer: $30 million (in the end)
Here is a quote from the Microsoft Research Center
"Microsoft's founders, John Redmond, and Joseph F. Smith, and Robert Woodruff, agreed that the company was the best company in the world in the world, and that they needed to make a profit to be able to go on a successful business."
Microsoft CEO Bill Gates also
"is one of the founders of the company, who came to be known as Microsoft, who led the way

Sources:
headquartered in Redmond, Washington