In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## step1 : Load and split the dataset
loader = TextLoader("langchain_crewai_dataset.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)

In [3]:
### step 2: Vector Store
embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(chunks,embedding_model)

## step 3:MMR Retriever
retriever=vectorstore.as_retriever(search_type="mmr",search_kwargs={"k":5})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000017D16EAF920>, search_type='mmr', search_kwargs={'k': 5})

In [4]:
## step 4 : LLM and Prompt
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

llm=init_chat_model("openai:o4-mini")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000017D175206B0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000017D1777FFB0>, root_client=<openai.OpenAI object at 0x0000017D16EAFAD0>, root_async_client=<openai.AsyncOpenAI object at 0x0000017D175777D0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [5]:
# Query expansion
query_expansion_prompt = PromptTemplate.from_template("""
    You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.
    Original query: "{query}"
    Expanded query:
    """)

query_expansion_chain = query_expansion_prompt | llm | StrOutputParser()
query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.\n    Original query: "{query}"\n    Expanded query:\n    ')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000017D175206B0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000017D1777FFB0>, root_client=<openai.OpenAI object at 0x0000017D16EAFAD0>, root_async_client=<openai.AsyncOpenAI object at 0x0000017D175777D0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [6]:
query_expansion_chain.invoke({"query":"Langchain memory"})

'Expanded query:  \n“LangChain memory” OR “LangChain memory management” OR “LangChain conversational memory” OR “LangChain session memory” OR “LangChain context persistence” OR “LangChain state management” OR “LangChain long-term memory” OR “LangChain short-term memory” OR “LangChain buffer memory” OR “ConversationBufferMemory” OR “ChatMessageHistory” OR “MemoryChain” OR “RetrievalAugmentedMemory” OR “vector store memory” OR “embedding-based memory” OR “AI agent memory” OR “prompt context storage” OR “multi-turn dialogue memory” OR “contextual memory management in LangChain (Python)” OR “LangChain memory integration with Redis/Pinecone/Chroma” OR “LangChain memory optimization”'

In [9]:
# RAG answering prompt
answer_prompt = PromptTemplate.from_template("""
    Answer the question based on the context below.

    Context:
    "{context}

    Question: {input}
    """)

document_chain=create_stuff_documents_chain(llm=llm,prompt=answer_prompt)

In [11]:
# Step 5: Full RAG pipeline with query expansion
rag_pipeline = (
       RunnableMap({
            "input": lambda x: x["input"],
            "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
    | document_chain
)

In [12]:
# Step 5: Full RAG pipeline with query expansion
rag_pipeline = (
    RunnableMap({
        "input": lambda x: x["input"],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
       | document_chain
)

In [13]:
# Step 6: Run query
query = {"input": "What types of memory does LangChain support?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\\n", response)

Expanded query:  
“LangChain memory support types” OR “LangChain memory modules” OR “LangChain memory API”  
AND (conversation memory OR chat‐history persistence OR context management OR session state OR memory augmentation)  
AND (BufferMemory OR ConversationBufferMemory OR ConversationSummaryMemory OR TokenBufferMemory OR ChatMessageHistory)  
AND (in-memory OR RedisMemory OR SQLMemory OR file-based OR browser-storage memory backends)  
AND (vector store integration OR Pinecone OR FAISS OR Chroma OR Weaviate)  
AND (memory retriever OR memory management OR state persistence OR context summarization)  
AND (LangChain Python SDK OR LangChain JavaScript SDK)
✅ Answer:\n LangChain supports at least two built-in memory modules out of the box:  
- ConversationBufferMemory – keeps the full recent exchange history in memory  
- ConversationSummaryMemory – compresses long chats into a concise summary to stay within token limits


In [14]:
# Step 6: Run query
query = {"input": "CrewAI agents?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\\n", response)

Expanded query:

“CrewAI agents” OR “Crew AI autonomous agents” OR “CrewAI multi-agent system” OR “CrewAI platform” OR “AI agent orchestration” OR “CrewAI API” OR “CrewAI SDK” OR “AI-powered digital assistants” OR “autonomous workflow automation” OR “enterprise AI agent framework” OR “digital workforce” OR “AI-driven virtual collaborators” OR “CrewAI use cases” OR “CrewAI technical documentation”
✅ Answer:\n CrewAI agents are autonomous, semi-independent “workers” in a multi-agent system, each defined by:  
• A clear purpose and goal  
• A designated role (e.g. researcher, planner, executor)  
• A set of tools and memory they may use  

Key characteristics:  
1. Structured Collaboration – Agents form a workflow, handing off tasks and results according to their roles.  
2. Configurable via YAML/JSON – Developers specify each agent’s role, goals, memory, and tools in a simple declarative config.  
3. Autonomous Orchestration – CrewAI manages the agent loop, turn-taking, tool invocation, 