### All Required Imports

In [1]:
import os
import warnings
import json
import shutil
from dotenv import load_dotenv
from datetime import datetime
from typing import Dict, List, Optional

# LangChain Core
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Memory & Chains
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory
from langchain.chains import ConversationChain, LLMChain, RetrievalQA

# Retrieval & Vector Stores
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.document_loaders import PyPDFLoader

# Tools & Agents
from langchain.tools import tool
from langchain.agents import initialize_agent, AgentType



# Suppress warnings
warnings.filterwarnings('ignore')

### Environment Setup and API Key Verification

In [2]:
# Load environment variables
load_dotenv()

# Verify API key
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("❌ OPENAI_API_KEY not found in environment variables. Please set it in your .env file.")
else:
    print("✅ OpenAI API key found and loaded")

✅ OpenAI API key found and loaded


## 1. Models - Basic Setup
**Models are the AI brains (like GPT-4) that understand your questions and generate intelligent responses.**

### Understanding Message Types in LangChain

This is how LangChain keeps track of who said what in a conversation — it uses different message types for different roles:

- **HumanMessage** → input from the user (like you typing a query).
- **AIMessage** → responses generated by the AI model.
- **SystemMessage** → system-level instructions (e.g., context, rules, or prompts that guide the AI's behavior).

So in your code:
```python
response = llm.invoke([HumanMessage(content=test_input)])
```

You're passing the model a list of messages, where the first message is from the human (you). The model then replies with an AIMessage.

👉 If you just passed a raw string, it wouldn't know who the message was from, but with HumanMessage, it knows it's your input in a dialogue.

In [3]:
# Initialize OpenAI model
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.1,
    api_key=openai_api_key
)

# Test the model
test_input = "What are the top 3 benefits of online learning platforms?"

print("🔵 INPUT TO GPT:")
print(f"Query: {test_input}")

print("\n🔵 CALLING GPT-4O-MINI...")
response = llm.invoke([HumanMessage(content=test_input)])

print("\n🤖 OUTPUT FROM GPT:")
print(response.content)

🔵 INPUT TO GPT:
Query: What are the top 3 benefits of online learning platforms?

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
Online learning platforms offer numerous advantages, but here are the top three benefits:

1. **Flexibility and Convenience**: Online learning allows students to access courses and materials at their own pace and on their own schedule. This flexibility is particularly beneficial for those balancing work, family, or other commitments, as it enables learners to study when it suits them best.

2. **Wide Range of Courses and Resources**: Online platforms provide access to a vast array of courses across various subjects and disciplines, often from renowned institutions and experts. This diversity allows learners to explore new interests, acquire new skills, and pursue professional development opportunities that may not be available locally.

3. **Cost-Effectiveness**: Many online learning platforms offer affordable or even free courses, reducing the financial burden

## 2. Prompts and Templates
**Prompts are structured instructions with placeholders that tell the model exactly what to do with your data.**

In [6]:
# Block 1: Basic Property Support Template
user_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a professional property agent. Be patient and helpful."),
    ("human", "Analyze this user query: {user_query}"),
    ("human", "Provide urgency level (1-5) and suggested response category.")
])

# Format and use the prompt
user_query = "I don't understand the tenancy agreements and I need to sign tomorrow!"
formatted_prompt = user_prompt.format_messages(user_query=user_query)

print("🔵 INPUT TO GPT:")
print("Template: Property Support Analysis")
print(f"User Query: {user_query}")
print(f"Formatted Messages: {[msg.content for msg in formatted_prompt]}")

print("\n🔵 CALLING GPT-4O-MINI...")
response = llm.invoke(formatted_prompt)

print("\n🤖 OUTPUT FROM GPT:")
print(response.content)

🔵 INPUT TO GPT:
Template: Property Support Analysis
User Query: I don't understand the tenancy agreements and I need to sign tomorrow!
Formatted Messages: ['You are a professional property agent. Be patient and helpful.', "Analyze this user query: I don't understand the tenancy agreements and I need to sign tomorrow!", 'Provide urgency level (1-5) and suggested response category.']

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
Urgency Level: 5 (High urgency, as the user needs to sign the agreement tomorrow)

Suggested Response Category: Clarification and Guidance on Tenancy Agreements


In [7]:
# Block 2: Multi-role Conversation Template
support_template = ChatPromptTemplate.from_messages([
    ("system", "You are a {role} at {platform}. Use {tone} tone."),
    ("human", "Student issue: {issue}"),
    ("human", "Previous context: {context}"),
    ("human", "Generate a response that addresses the issue and follows up appropriately.")
])

# Example usage
response_inputs = {
    "role": "Senior Property Agent",
    "platform": "Real Estage Consultant Platform", 
    "tone": "patient and supportive",
    "issue": "Struggling with understanding tenancy agreements",
    "context": "User is new to property rental processes, but need to sign tenancy soon"
}

formatted_chat = support_template.format_messages(**response_inputs)

print("\n🔵 INPUT TO GPT:")
print("Template: Multi-role Support Response")
print(f"Variables: {response_inputs}")

print("\n🔵 CALLING GPT-4O-MINI...")
response = llm.invoke(formatted_chat)

print("\n🤖 OUTPUT FROM GPT:")
print(response.content[:200] + "...")


🔵 INPUT TO GPT:
Template: Multi-role Support Response
Variables: {'role': 'Senior Property Agent', 'platform': 'Real Estage Consultant Platform', 'tone': 'patient and supportive', 'issue': 'Struggling with understanding tenancy agreements', 'context': 'User is new to property rental processes, but need to sign tenancy soon'}

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
Absolutely, I understand that navigating tenancy agreements can be quite overwhelming, especially if you're new to the property rental process. It's completely normal to have questions and concerns.

...


## 3. Chains - Connecting Components
**Chains link multiple steps together (prompt → model → parser) to create reusable AI workflows.**

### Understanding the Pipe Operator (|) in LangChain

#### What does | mean here?

Yes — in LangChain's new "LangChain Expression Language" (LCEL), the `|` is operator overloading for pipe composition.

It works a lot like a Unix shell pipeline (`cmd1 | cmd2 | cmd3`), but instead of passing raw text, each component in LangChain has a well-defined input/output schema. The pipe (`|`) just wires them together.

So this:
```python
solution_template | llm | StrOutputParser()
```

means:
1. Take the PromptTemplate (solution_template)
2. Format it with the given inputs → produces a prompt string (or messages)
3. Send it to llm (the LLM model) → produces an LLM response
4. Pass the response into StrOutputParser() → returns a clean string

Each `|` passes the output of the left component as input to the right component, creating a seamless data flow pipeline.

#### Chain vs Manual Steps Comparison

**❌ Manual Approach (3 separate steps):**
```python
# Step 1: Format the prompt
formatted_prompt = prompt.format_messages(metric=metric_input)

# Step 2: Call the LLM
raw_response = llm.invoke(formatted_prompt)

# Step 3: Parse the output
result = parser.invoke(raw_response)
```

**✅ Chain Approach (1 simple step):**
```python
# All steps combined into one seamless pipeline
analysis_chain = prompt | llm | parser
result = analysis_chain.invoke({"metric": metric_input})
```

**Benefits of Chains:**
- **Less Code**: 1 line instead of 3 separate operations
- **No Intermediate Variables**: No need to manage `formatted_prompt` or `raw_response`
- **Automatic Data Flow**: Each component's output automatically becomes the next component's input
- **Reusable Pipeline**: Create once, use multiple times with different inputs
- **Error Handling**: Built-in error propagation through the pipeline

In [8]:
# Create tenancy data analysis chain
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a professional property agent."),
    ("human", "Analyze this property metric: {metric}")
])

parser = StrOutputParser()

# Create chain using pipe operator
analysis_chain = prompt | llm | parser

# Use the chain
metric_input = "property is 20 years old with recent renovation"

print("🔵 INPUT TO GPT:")
print("Chain: Property Data Analysis")
print(f"Metric: {metric_input}")

print("\n🔵 CALLING GPT-4O-MINI...")
result = analysis_chain.invoke({"metric": metric_input})

print("\n🤖 OUTPUT FROM GPT:")
print(result)

🔵 INPUT TO GPT:
Chain: Property Data Analysis
Metric: property is 20 years old with recent renovation

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
When analyzing a property that is 20 years old with recent renovations, several key factors should be considered:

1. **Age of the Property**: 
   - A 20-year-old property is typically considered to be in the middle of its lifecycle. It may have some wear and tear, but if it has been well-maintained, it can still be a solid investment.
   - The age can also affect the building codes and standards it was built under, which may differ from current regulations.

2. **Recent Renovations**:
   - Renovations can significantly enhance the property's value and appeal. It's important to assess the quality and extent of the renovations. Were they cosmetic (like new paint and flooring) or structural (like updated plumbing, electrical systems, or roof)?
   - Renovations can also improve energy efficiency, which can lower utility costs and appeal to env

## 4. Memory - Conversation Context
**Memory stores conversation history so the AI remembers what was discussed earlier in the chat.**

In [9]:
# Initialize conversation memory
memory = ConversationBufferMemory()

# Create conversation chain with memory
conversation = ConversationChain(
    llm=llm,
    memory=memory,
    verbose=False
)

# Have a conversation
input1 = "Hi, I'm Alice and I'm struggling with understanding my tenancy agreements"

print("🔵 INPUT TO GPT:")
print("Conversation with Memory - Turn 1")
print(f"Student: {input1}")

print("\n🔵 CALLING GPT-4O-MINI...")
response1 = conversation.invoke({"input": input1})["response"]

print("\n🤖 OUTPUT FROM GPT:")
print("AI:", response1)

input2 = "What's my name and what am I busy with?"

print("\n🔵 INPUT TO GPT:")
print("Conversation with Memory - Turn 2")
print(f"Student: {input2}")

print("\n🔵 CALLING GPT-4O-MINI...")
response2 = conversation.invoke({"input": input2})["response"]

print("\n🤖 OUTPUT FROM GPT:")
print("AI:", response2)

print("\n📝 Memory Buffer:")
print(memory.buffer)

🔵 INPUT TO GPT:
Conversation with Memory - Turn 1
Student: Hi, I'm Alice and I'm struggling with understanding my tenancy agreements

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
AI: Hello Alice! It's great to meet you. Tenancy agreements can definitely be a bit tricky to navigate. They usually outline the rights and responsibilities of both the landlord and the tenant, including things like rent payment, maintenance responsibilities, and rules about the property. What specific aspects of your tenancy agreement are you struggling with? Are there particular clauses or terms that are confusing? I'm here to help!

🔵 INPUT TO GPT:
Conversation with Memory - Turn 2
Student: What's my name and what am I busy with?

🔵 CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT:
AI: Your name is Alice, and you're currently busy with understanding your tenancy agreements. It sounds like you're trying to get a clearer grasp on the details and terms involved. If you have any specific questions or sections you'd li

## 5. Retrieval - Knowledge Base Integration
**Retrieval searches through documents/databases to find relevant information before answering questions.**

In [11]:
# Sample educational policies for knowledge base
educational_policies = [
    "Course Drop Policy: Students can drop courses within 2 weeks for full refund. Partial refunds available until week 4.",
    "Grade Appeal Process: Students must submit grade appeals within 2 weeks of grade posting. Appeals are reviewed by academic committee.",
    "Academic Probation: Students with GPA below 2.0 for two consecutive semesters are placed on academic probation.",
    "Extension Policy: Course extensions up to 30 days require instructor approval. Extensions over 30 days need academic advisor approval.",
    "Technical Support: All students receive email support within 24 hours. Priority support available for premium students.",
    "Graduation Requirements: Students must complete 120 credits with minimum 2.0 GPA and all core course requirements."
]

# Create embeddings and vector store
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

print("📚 Creating Knowledge Base...")
vectorstore = Chroma.from_texts(
    texts=educational_policies,
    embedding=embeddings
)

# Create retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

📚 Creating Knowledge Base...


### Understanding RetrievalQA.from_chain_type() Components

```python
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)
```

**🤔 Why `from_chain_type()` instead of direct initialization?**

`RetrievalQA` is a complex chain that needs to coordinate multiple steps:
1. **Retrieve** relevant documents from vector store
2. **Combine** retrieved text with user question  
3. **Generate** answer using LLM

`from_chain_type()` is a **factory method** that automatically builds the right chain configuration for you, instead of manually wiring these components together.

**🗂️ What is `chain_type="stuff"`?**

The "stuff" method determines **how retrieved documents are combined** before sending to the LLM:

| Chain Type | How It Works | Best For | Example |
|------------|--------------|----------|---------|
| **"stuff"** | Concatenates ALL retrieved docs into one prompt | Short docs, simple questions | "Here are 3 policy documents: [doc1][doc2][doc3]. Question: What's the drop policy?" |
| **"map_reduce"** | Summarizes each doc separately, then combines summaries | Long docs, complex analysis | Step 1: Summarize each doc → Step 2: Combine summaries → Answer |
| **"refine"** | Builds answer iteratively, refining with each doc | Sequential reasoning needed | Answer using doc1 → Refine answer with doc2 → Final refine with doc3 |
| **"map_rerank"** | Scores each doc's relevance, uses highest-scoring one | When only 1 best source needed | Score doc1: 0.9, doc2: 0.7, doc3: 0.8 → Use doc1 only |

**"stuff"** is the **most common and fastest** approach for typical Q&A scenarios.

**🔍 What is `vectorstore.as_retriever()`?**

**Vector Store** vs **Retriever** serve different purposes:

- **VectorStore** (`Chroma`) = Database that stores and searches embeddings
  - Methods: `.similarity_search()`, `.add_texts()`, `.persist()`
  - Direct database operations

- **Retriever** = Standardized interface for getting relevant documents  
  - Methods: `.get_relevant_documents()`, `.invoke()`
  - Works with any retrieval system (vector, keyword, hybrid)

**Why convert?**
```python
# ❌ RetrievalQA expects a "retriever" interface, not a "vectorstore" 
RetrievalQA.from_chain_type(retriever=vectorstore)  # TypeError!

# ✅ Convert vectorstore to retriever interface
RetrievalQA.from_chain_type(retriever=vectorstore.as_retriever())  # Works!
```

**Optional retriever configuration:**
```python
retriever = vectorstore.as_retriever(
    search_type="similarity",     # or "mmr" (max marginal relevance) 
    search_kwargs={"k": 3}        # return top 3 documents
)
```

In [12]:
# Test retrieval
question = "What is the course drop policy?"

print(f"\n🔵 INPUT TO RETRIEVAL QA:")
print(f"Question: {question}")
print("Action: Search knowledge base + Generate answer")

print("\n🔵 SEARCHING KNOWLEDGE BASE AND CALLING GPT-4O-MINI...")
result = qa_chain.invoke({"query": question})

print("\n🤖 OUTPUT FROM GPT (with retrieved context):")
print(f"Answer: {result['result']}")
print(f"Sources: {[doc.page_content[:100] + '...' for doc in result['source_documents']]}")


🔵 INPUT TO RETRIEVAL QA:
Question: What is the course drop policy?
Action: Search knowledge base + Generate answer

🔵 SEARCHING KNOWLEDGE BASE AND CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT (with retrieved context):
Answer: Students can drop courses within 2 weeks for a full refund. Partial refunds are available until week 4.
Sources: ['Course Drop Policy: Students can drop courses within 2 weeks for full refund. Partial refunds availa...', 'Course Drop Policy: Students can drop courses within 2 weeks for full refund. Partial refunds availa...', 'Course Drop Policy: Students can drop courses within 2 weeks for full refund. Partial refunds availa...', 'Extension Policy: Course extensions up to 30 days require instructor approval. Extensions over 30 da...']


## 6. PDF Processing with Chroma DB - Document Q&A
**PDF Processing extracts text from PDFs and creates searchable vector databases for intelligent Q&A.**

In [10]:
def load_and_process_pdf(pdf_path):
    """Load PDF and process it for Chroma DB"""
    
    print("📚 Loading PDF document...")
    print(f"File: {pdf_path}")
    
    # Load PDF
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()
    
    print(f"✅ Loaded {len(pages)} pages from PDF")
    
    # Split text into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )
    
    docs = text_splitter.split_documents(pages)
    print(f"📝 Split into {len(docs)} text chunks")
    
    # Create embeddings
    embeddings_pdf = OpenAIEmbeddings(openai_api_key=openai_api_key)
    
    # Clear any existing chroma database (commented out to prevent errors on rerun)
    db_path = "./pdf_knowledge_base"
    # if os.path.exists(db_path):
    #     shutil.rmtree(db_path)
    #     print("🧹 Cleared existing database")
    
    # Create Chroma vector store
    print("🔍 Creating vector embeddings...")
    vectorstore_pdf = Chroma.from_documents(
        documents=docs,
        embedding=embeddings_pdf,
        persist_directory=db_path
    )
    
    # Persist the database
    vectorstore_pdf.persist()
    print(f"💾 Vector database saved to: {db_path}")
    
    return vectorstore_pdf

def create_pdf_qa_system(vectorstore_pdf):
    """Create Q&A system for PDF documents"""
    
    # Create retrieval QA chain
    qa_chain_pdf = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vectorstore_pdf.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 3}  # Return top 3 relevant chunks
        ),
        return_source_documents=True
    )
    
    print("✅ PDF Q&A system created successfully")
    return qa_chain_pdf

# Process the PDF (assuming educational_course_handbook.pdf exists)
pdf_file_path = "examples/Track_B_Tenancy_Agreement.pdf"

print("🚀 Starting PDF Processing Pipeline...")
pdf_vectorstore = load_and_process_pdf(pdf_file_path)

# Create Q&A system
pdf_qa_system = create_pdf_qa_system(pdf_vectorstore)

# Test the PDF Q&A system
print(f"\n{'='*60}")
print("TESTING PDF Q&A SYSTEM")
print(f"{'='*60}")

# Educational questions to test
pdf_questions = [
    "What is the address of the premise?",
    "Can I hang a picture on the wall?", 
    "Can I claim my deposit back if I end the tenancy early?"
]

for question in pdf_questions:
    print(f"\n🔵 INPUT TO PDF Q&A SYSTEM:")
    print(f"Question: {question}")
    print("Action: Search PDF content + Generate answer")
    
    print(f"\n🔵 SEARCHING PDF DATABASE AND CALLING GPT-4O-MINI...")
    
    result = pdf_qa_system.invoke({"query": question})
    
    print(f"\n🤖 OUTPUT FROM GPT (with PDF context):")
    print(f"Answer: {result['result']}")
    print(f"📄 Sources: Page {result['source_documents'][0].metadata.get('page', 'Unknown')} of PDF")
    print(f"📝 Source Text Preview: {result['source_documents'][0].page_content[:150]}...")

🚀 Starting PDF Processing Pipeline...
📚 Loading PDF document...
File: examples/Track_B_Tenancy_Agreement.pdf
✅ Loaded 10 pages from PDF
📝 Split into 37 text chunks
🔍 Creating vector embeddings...
💾 Vector database saved to: ./pdf_knowledge_base
✅ PDF Q&A system created successfully

TESTING PDF Q&A SYSTEM

🔵 INPUT TO PDF Q&A SYSTEM:
Question: What is the address of the premise?
Action: Search PDF content + Generate answer

🔵 SEARCHING PDF DATABASE AND CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT (with PDF context):
Answer: I don't know.
📄 Sources: Page 7 of PDF
📝 Source Text Preview: address specified herein at the last known place of abode or business. A notice sent by registered letter shall deemed
to be given at the time when it...

🔵 INPUT TO PDF Q&A SYSTEM:
Question: Can I hang a picture on the wall?
Action: Search PDF content + Generate answer

🔵 SEARCHING PDF DATABASE AND CALLING GPT-4O-MINI...

🤖 OUTPUT FROM GPT (with PDF context):
Answer: Yes, you can hang a picture on the wall, 

## 7. Tools - Function Calling
**Tools are custom functions the AI can call to perform specific actions like calculations, lookups, or API calls.**

In [None]:
# Define educational tools
@tool
def get_student_info(student_id: str) -> str:
    """Get student information from learning management system"""
    students = {
        "STU001": {
            "name": "Alice Johnson",
            "email": "alice.johnson@university.edu",
            "major": "Computer Science", 
            "gpa": 3.7,
            "current_courses": ["CS101", "MATH201", "PHYS101"],
            "support_level": "Premium"
        }
    }
    student = students.get(student_id, {"error": "Student not found"})
    return json.dumps(student, indent=2)

@tool
def calculate_grade(points_earned: int, total_points: int) -> str:
    """Calculate percentage grade and letter grade"""
    if total_points == 0:
        return "Error: Total points cannot be zero"
    
    percentage = (points_earned / total_points) * 100
    
    if percentage >= 90:
        letter = "A"
    elif percentage >= 80:
        letter = "B"
    elif percentage >= 70:
        letter = "C"
    elif percentage >= 60:
        letter = "D"
    else:
        letter = "F"
    
    return f"Grade: {percentage:.1f}% ({letter})"

# Test tools directly
print("🔧 Testing Tools:")

print("\n🔵 INPUT TO TOOL:")
print("Tool: Grade Calculator")
print("Input: 85 points out of 100")

grade_result = calculate_grade.invoke({"points_earned": 85, "total_points": 100})
print(f"🤖 TOOL OUTPUT: {grade_result}")

print("\n🔵 INPUT TO TOOL:")
print("Tool: Student Lookup")
print("Input: STU001")

student_result = get_student_info.invoke({"student_id": "STU001"})
print(f"🤖 TOOL OUTPUT:\n{student_result}")

## 8. Agents - Autonomous Decision Making
**Agents autonomously decide which tools to use and in what order to accomplish complex tasks.**

### AgentType Differences

| AgentType | Description | Example |
|-----------|-------------|---------|
| **STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION** | Uses structured JSON-like tool calls (machine-friendly). Chooses tools based only on descriptions, no memory. | User: "Find student STU001 grade." → Agent: outputs structured call `{ "action": "lookup_student", "args": {"id": "STU001"}}`. |
| **ZERO_SHOT_REACT_DESCRIPTION** | Uses free-text reasoning (ReAct) to decide tools. No strict structure. | User: "What's 25×36?" → Agent: "I should use calculator. Action: calculator, Input: 25×36". |
| **CHAT_ZERO_SHOT_REACT_DESCRIPTION** | Same as above but designed for chat models, handles multi-turn back-and-forth. | User: "What's 2023+7?" → Agent: "2030." → User (next turn): "Now divide that by 2." → Agent: uses prior answer (2030) → "1015." |
| **CONVERSATIONAL_REACT_DESCRIPTION** | Adds conversation memory (remembers past queries + tool use). | User: "Book me a flight." → Agent: calls flight tool. → User (later): "Make it business class." → Agent: recalls last tool call (flight booking) → modifies it with "business class." |
| **SELF_ASK_WITH_SEARCH** | Special agent: asks itself clarifying sub-questions, then uses search. | User: "Who is CEO of company that owns Instagram?" → Agent: "Who owns Instagram? Meta." → "Who is CEO of Meta? Zuckerberg." |
| **OPENAI_FUNCTIONS** | Uses OpenAI's native function-calling API (direct JSON calls). Very reliable. | User: "Weather in Paris tomorrow." → Agent: outputs `{"name": "get_weather", "arguments": {"city": "Paris", "date": "tomorrow"}}`. |

In [None]:
# Create tools list
tools = [get_student_info, calculate_grade]

# Initialize agent with tools
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Test agent with complex query
query = "Look up student STU001 and calculate their grade if they got 87 points out of 95 total"

print("🔵 INPUT TO AGENT:")
print("Complex Query:", query)
print("Available Tools:", [tool.name for tool in tools])

print("\n🔵 AGENT REASONING AND GPT-4O-MINI CALLS...")
print("(Agent will make multiple calls to GPT and tools)")

response = agent.run(query)

print("\n🤖 FINAL OUTPUT FROM AGENT:")
print(response)

## 9. Complete Educational Support System
**Combines all components (models, prompts, chains, memory, retrieval) into a complete working system.**

### EducationalSupportBot Architecture

The `EducationalSupportBot` class integrates all LangChain components into a unified system:

#### **Initialization Components:**
- **LLM**: GPT-4o-mini for all AI processing
- **Knowledge Base**: Chroma vector store with educational policies
- **Memory**: ConversationBufferMemory for chat history
- **QA Chain**: RetrievalQA for policy questions
- **Tools**: get_student_info() and calculate_grade() functions
- **Agent**: STRUCTURED_CHAT agent with access to tools

#### **Smart Query Routing Logic:**
The `process_query()` method uses keyword-based intent detection:

1. **Policy Keywords** (`"policy", "drop", "appeal", "probation", "requirement"`)
   → **Route**: Knowledge Base Retrieval QA
   → **Action**: Search educational policies and generate answer

2. **Student/Grade Keywords** (`"student", "lookup", "info", "grade", "calculate"`)
   → **Route**: Agent with Tools
   → **Action**: Autonomous tool selection (lookup student or calculate grade)

3. **General Support** (all other queries)
   → **Route**: Conversation Chain with Memory
   → **Action**: General educational guidance with conversation history

#### **Testing Strategy:**
Tests three distinct query types to demonstrate all routing paths:
- Policy question → Knowledge base retrieval
- Student/grade query → Agent tool usage  
- General support → Conversation memory

In [7]:
class EducationalSupportBot:
    def __init__(self):
        self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.1, api_key=openai_api_key)
        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
        # Knowledge base
        self.vectorstore = Chroma.from_texts(
            texts=educational_policies,
            embedding=self.embeddings
        )
        
        # Memory for conversations
        self.memory = ConversationBufferMemory()
        
        # QA chain for policies
        self.qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vectorstore.as_retriever()
        )
        
        # Tools
        self.tools = [get_student_info, calculate_grade]
        
        # Agent
        self.agent = initialize_agent(
            tools=self.tools,
            llm=self.llm,
            agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
            verbose=False
        )
    
    def process_query(self, query: str):
        """Process student query intelligently"""
        
        print(f"🔵 INPUT TO SUPPORT BOT:")
        print(f"Query: {query}")
        
        # Simple intent detection
        query_lower = query.lower()
        
        if any(word in query_lower for word in ["policy", "drop", "appeal", "probation", "requirement"]):
            # Use retrieval for policy questions
            print("🎯 Intent: Policy Question → Using Knowledge Base")
            print("\n🔵 SEARCHING KNOWLEDGE BASE...")
            result = self.qa_chain.run(query)
            
        elif any(word in query_lower for word in ["student", "lookup", "info", "grade", "calculate"]):
            # Use agent for student-related queries
            print("🎯 Intent: Student/Grade Query → Using Agent with Tools")
            print("\n🔵 CALLING AGENT...")
            result = self.agent.run(query)
            
        else:
            # Use memory conversation for general support
            print("🎯 Intent: General Support → Using Conversation Memory")
            print("\n🔵 CALLING GPT WITH MEMORY...")
            conversation = ConversationChain(llm=self.llm, memory=self.memory)
            result = conversation.invoke({"input": query})["response"]
        
        print(f"\n🤖 SUPPORT BOT RESPONSE:")
        print(result)
        return result

# Initialize the complete system
print("🚀 Initializing Complete Educational Support Bot...")
support_bot = EducationalSupportBot()

# Test various query types
test_queries = [
    "What's the course drop policy?",
    "Look up student STU001 and calculate grade for 92 out of 100 points", 
    "I'm feeling overwhelmed with my coursework. Any study tips?"
]

for query in test_queries:
    print(f"\n{'='*60}")
    print(f"TESTING COMPLETE SUPPORT BOT")
    print(f"{'='*60}")
    
    support_bot.process_query(query)

print(f"\n{'='*60}")
print("✅ LangChain Tutorial Complete!")
print("You've learned: Models → Prompts → Chains → Memory → Retrieval → Tools → Agents → Integration")
print(f"{'='*60}")

🚀 Initializing Complete Educational Support Bot...


NameError: name 'get_student_info' is not defined