# Day 2: Simple RAG Chatbot (30 minutes)
## Build a FAQ Chatbot with Hugging Face

Today we'll build a simple RAG (Retrieval-Augmented Generation) chatbot that answers FAQ questions!

In [2]:
# Quick setup - install what we need
import subprocess
import sys

def install_if_needed(package):
    try:
        __import__(package.split('==')[0])
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Install essential packages
packages = ["sentence-transformers", "scikit-learn", "numpy", "pandas"]
for pkg in packages:
    install_if_needed(pkg)

# Import everything
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

print("Setup complete! Ready to build RAG chatbot")

In [2]:
# Load FAQ data
df = pd.read_csv(r"C:\Users\vHaze\Desktop\LLMinds\workshop2\data\simple_faq.csv")
print(f"Loaded {len(df)} FAQ entries")

# Load Hugging Face embedding model
print("Loading embedding model...")
model = SentenceTransformer('all-MiniLM-L6-v2')  # Small, fast, good quality
print("Model loaded!")

# Create embeddings for all FAQ questions
print("Creating embeddings...")
question_embeddings = model.encode(df['question'].tolist())
print(f"Created embeddings! Shape: {question_embeddings.shape}")

In [3]:
# Simple RAG Chatbot Class
class SimpleRAGBot:
    def __init__(self, model, df, embeddings, threshold=0.3):
        self.model = model
        self.df = df  
        self.embeddings = embeddings
        self.threshold = threshold
        
    def chat(self, user_question):
        print(f"User: {user_question}")
        
        # Find most similar FAQ
        query_embedding = self.model.encode([user_question])
        similarities = cosine_similarity(query_embedding, self.embeddings)[0]
        best_match_idx = np.argmax(similarities)
        best_similarity = similarities[best_match_idx]
        
        # Check if similarity is good enough
        if best_similarity < self.threshold:
            response = "I don't have information about that. Please contact support!"
            confidence = "Low"
        else:
            response = self.df.iloc[best_match_idx]['answer']
            confidence = "High" if best_similarity > 0.7 else "Medium"
            
        print(f"Bot: {response}")
        print(f"Confidence: {confidence} ({best_similarity:.3f})")
        print("-" * 50)
        return response

# Create chatbot
bot = SimpleRAGBot(model, df, question_embeddings, threshold=0.3)
print("RAG Chatbot created!")

In [4]:
# Test the chatbot
test_questions = [
    "What time do you open?",           # Should match business hours
    "How do I contact customer service?", # Should match support
    "Can I pay with PayPal?",          # Should match payment
    "What's the weather today?",       # Should say "don't know"
]

print("Testing RAG Chatbot:")
print("=" * 50)

for question in test_questions:
    bot.chat(question)

In [1]:
# Try your own questions!
def start_chat():
    print("RAG Chatbot is ready! Type 'quit' to stop.")
    print("Try asking about: business hours, support, payment, shipping")
    
    while True:
        user_input = input("\nYou: ").strip()
        
        if user_input.lower() in ['quit', 'exit', 'stop']:
            print("Thanks for chatting!")
            break
            
        if user_input:
            bot.chat(user_input)

# Uncomment to start interactive chat:
# start_chat()

# Quick test instead:
print("Quick test:")
bot.chat("What are your hours?")
bot.chat("Hello there!")

print("Ready for Day 3: Building a web interface!")