In [None]:
import os
import torch
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.agents import Tool

# ======== 1. Load embeddings model and vector store ========
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("qa_index_cleaned", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# ======== 2. Load local Qwen2-1.5B quantized model (4bit) ========
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)

generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=256,
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=generate_text)

# ======== 3. Build RAG QA tool ========
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=False
)

rag_tool = Tool(
    name="rag_qa_tool",
    func=qa_chain.run,
    description="Use this tool only to answer questions based on the internal knowledge base. It does NOT have functions like 'learn' or others."
)

# ======== 4. Build LangGraph Agent ========
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import create_react_agent

from langchain_core.runnables import Runnable
from langchain_core.agents import AgentAction, AgentFinish
from typing import TypedDict, List, Union
from langchain.agents import initialize_agent

from langchain_core.runnables import RunnableLambda

# Wrap as LangGraph-compatible runnable
def simple_react_agent_runnable(state):
    input_text = state["input"]
    steps = state.get("steps", [])
    output = agent_chain.run(input_text)
    return {"input": input_text, "steps": steps, "final": output}

class ToolExecutor:
    def __init__(self, tools):
        self.tool_map = {tool.name: tool.func for tool in tools}

    def invoke(self, tool_input):
        # Only one tool in this setup
        return self.tool_map["rag_qa_tool"](tool_input)

# Define agent state structure
class AgentState(TypedDict):
    input: str
    steps: List[tuple[AgentAction, str]]
    final: Union[str, None]

tools = [rag_tool]
tool_executor = ToolExecutor(tools)

agent_chain = initialize_agent(
    tools=tools,
    llm=llm,
    agent="zero-shot-react-description",
    verbose=True,
    handle_parsing_errors=True
)

# Node function: call agent
def call_agent(state: AgentState) -> AgentState:
    return simple_react_agent_runnable(state)

# Node function: call tool
def call_tool(state: AgentState) -> AgentState:
    output = tool_executor.invoke(state["input"])
    return {"input": state["input"], "steps": state.get("steps", []), "final": output}

# Control flow: decide whether to continue reasoning
def should_continue(state: AgentState) -> str:
    return "tools" if state["final"] is None else END

# Build LangGraph graph structure
graph = StateGraph(AgentState)
graph.add_node("agent", call_agent)
graph.add_node("tools", call_tool)
graph.set_entry_point("agent")
graph.add_conditional_edges("agent", should_continue, {
    "tools": "tools",
    END: END
})
graph.add_edge("tools", "agent")

# Compile graph (with memory caching)
runnable = graph.compile()

# LangGraph agent run function
def run_langgraph_agent(query: str):
    inputs = {"input": query, "steps": [], "final": None}
    result = runnable.invoke(inputs)
    return result["final"]

# ======== 5. CLI Interaction ========
if __name__ == "__main__":
    print("LangGraph RAG Agent started. Type 'exit' to quit.\n")
    while True:
        query = input("Enter your question:\n> ")
        if query.lower() in ["exit", "quit"]:
            break
        result = run_langgraph_agent(query)
        print(f"\nAnswer: {result}\n")


In [None]:
import os
import torch
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.agents import Tool, initialize_agent, AgentType

from langgraph.graph import StateGraph, END

from typing import TypedDict, List, Union

# ======== 1. Load embeddings model and vector store ========
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("qa_index_cleaned", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})

# ======== 2. Load local Qwen2-1.5B quantized model (4bit) ========
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)

generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=256,
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=generate_text)

# ======== 3. Build RAG QA tool ========
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=False
)

rag_tool = Tool(
    name="rag_qa_tool",
    func=qa_chain.run,
    description="Use this tool only to answer questions based on the internal knowledge base. It does NOT have any functions like 'learn' or others."
)

tools = [rag_tool]

# ======== 4. Initialize LangChain Agent ========
agent_chain = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=5
)

# ======== 5. Define Agent state structure ========
class AgentState(TypedDict):
    input: str
    steps: List[tuple]
    final: Union[str, None]

# ======== 6. Agent call function ========
def call_agent(state: AgentState) -> AgentState:
    """
    Calls the LangChain agent with input text.
    Returns the output in 'final'.
    """
    input_text = state["input"]
    try:
        output = agent_chain.run(input_text)
    except Exception as e:
        output = f"Agent call failed, error: {e}"

    return {
        "input": input_text,
        "steps": state.get("steps", []),
        "final": output
    }

# ======== 7. Control flow function ========
def should_continue(state: AgentState) -> str:
    """Decides whether to continue or end the graph execution"""
    return END if state["final"] is not None else "agent"

# ======== 8. Build LangGraph graph ========
graph = StateGraph(AgentState)
graph.add_node("agent", call_agent)
graph.set_entry_point("agent")
graph.add_conditional_edges("agent", should_continue, {END: END})

runnable = graph.compile()

# ======== 9. Run function ========
def run_langgraph_agent(query: str):
    inputs = {"input": query, "steps": [], "final": None}
    result = runnable.invoke(inputs)
    return result["final"]

# ======== 10. CLI interaction ========
if __name__ == "__main__":
    print("LangGraph RAG Agent started. Type 'exit' to quit.\n")
    while True:
        query = input("Enter your question:\n> ")
        if query.lower() in ["exit", "quit"]:
            break
        answer = run_langgraph_agent(query)
        print(f"\nAnswer: {answer}\n")


In [None]:
import os
import time
import torch
from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForCausalLM

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.agents import Tool, initialize_agent, AgentType

from langgraph.graph import StateGraph, END

from typing import TypedDict, List, Union

# ======== 1. Load embeddings model and vector database ========
print("[INFO] Loading embeddings model and vector database...")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("qa_index_cleaned", embedding_model, allow_dangerous_deserialization=True)
retriever = vectorstore.as_retriever(search_kwargs={"k": 6})
print("[DONE] Vector database loaded")

# ======== 2. Load local Qwen2-1.5B quantized model (4bit) ========
print("[INFO] Loading Qwen2-1.5B model (4bit)...")
model_name = "unsloth/qwen2-1.5b-bnb-4bit"
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto"
)

generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=False,
    max_new_tokens=256,
    do_sample=False,
    temperature=0.0,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=generate_text)
print("[DONE] Model loaded and ready for text generation")

# ======== 3. Build RAG QA tool ========
print("[INFO] Building RAG QA chain...")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=False
)

rag_tool = Tool(
    name="rag_qa_tool",
    func=qa_chain.run,
    description="Use this tool only to answer questions based on the internal knowledge base. It does NOT have any functions like 'learn' or others."
)

tools = [rag_tool]

# ======== 4. Initialize LangChain Agent ========
print("[INFO] Initializing LangChain Agent...")
agent_chain = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=3,
    early_stopping_method="force"  # Low iterations to avoid infinite loops
)

# ======== 5. Define Agent state structure ========
class AgentState(TypedDict):
    input: str
    steps: List[tuple]
    final: Union[str, None]

# ======== 6. Agent call function ========
def call_agent(state: AgentState) -> AgentState:
    input_text = state["input"]
    print(f"\n[INFO] Calling agent_chain with question: {input_text}")

    try:
        start_time = time.time()

        # Check retriever
        print("[INFO] Attempting document retrieval...")
        docs = retriever.get_relevant_documents(input_text)
        print(f"[INFO] Retrieved {len(docs)} documents")

        # Agent execution
        print("[INFO] Calling LLM + Tool reasoning...")
        output = agent_chain.run(input_text)

        elapsed = time.time() - start_time
        print(f"[DONE] agent_chain finished in {elapsed:.2f} seconds")

    except Exception as e:
        output = f"Agent call failed, error: {e}"
        print(f"[ERROR] Execution failed: {e}")

    return {
        "input": input_text,
        "steps": state.get("steps", []),
        "final": output
    }

# ======== 7. Control flow function ========
def should_continue(state: AgentState) -> str:
    return END if state["final"] is not None else "agent"

# ======== 8. Build LangGraph graph ========
graph = StateGraph(AgentState)
graph.add_node("agent", call_agent)
graph.set_entry_point("agent")
graph.add_conditional_edges("agent", should_continue, {END: END})

runnable = graph.compile()

# ======== 9. Run function ========
def run_langgraph_agent(query: str):
    print(f"\n[INFO] Received question: {query}")
    inputs = {"input": query, "steps": [], "final": None}
    result = runnable.invoke(inputs)
    print("[DONE] LangGraph execution completed")
    return result["final"]

# ======== 10. CLI interaction ========
if __name__ == "__main__":
    print("LangGraph RAG Agent started. Type 'exit' to quit.\n")
    while True:
        query = input("Enter your question:\n> ")
        if query.lower() in ["exit", "quit"]:
            break
        answer = run_langgraph_agent(query)
        print(f"\nAnswer: {answer}\n")
