<a href="https://colab.research.google.com/github/uguazelli/telegram_bot/blob/main/rag_sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required libraries
!pip install -U -q langchain langchain-openai faiss-cpu openai tiktoken langchain-community python-telegram-bot


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.4/70.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.6/755.6 kB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m708.7/708.7 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Setup environment
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, ContextTypes
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, ContextTypes

In [3]:

from google.colab import userdata
openai_key = userdata.get('OPENAI_API_KEY')
telegram_key = userdata.get('TELEGRAM_API_KEY')

# Simulate Company Document

In [None]:
doc_text = """
Company: AcmeTech Solutions

Our mission is to simplify logistics for small businesses using technology.

Our main services:
- Route Optimization
- Real-time GPS Tracking
- Automated Delivery Notifications
- Driver Behavior Monitoring

Key Policies:
- Drivers must check in every 2 hours.
- All deliveries must be signed digitally.
- Drivers are required to follow safety procedures.
- Breaks must be logged via the mobile app.

Support is available 8am to 6pm on weekdays.
"""

with open("company_doc.txt", "w") as f:
    f.write(doc_text)


# Start

In [None]:
# Load doc
loader = TextLoader("company_doc.txt")
documents = loader.load()

# Chunk it
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [None]:
# Embeddings + Vectorstore
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
db = FAISS.from_documents(docs, embeddings)

In [None]:
# Multi-query using GPT-3.5

# Create a retriever that fetches top 4 similar chunks from the vector DB
retriever=db.as_retriever(search_kwargs={"k": 4})

# Use GPT-3.5 to generate 3 variations of the user's question to improve retrieval
llm=ChatOpenAI(openai_api_key=openai_key, temperature=0, model="gpt-3.5-turbo")

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever,
    llm
)

In [None]:
# Create a compressor using GPT-3.5
# This will summarize each retrieved chunk to keep only the parts relevant to the question
# Helps reduce token usage and improve answer quality
compressor = LLMChainExtractor.from_llm(
    ChatOpenAI(openai_api_key=openai_key, temperature=0, model="gpt-3.5-turbo")
)

In [None]:
# Wrap the multi-query retriever with a contextual compressor
# This means: after retrieving chunks with multi-query, we'll compress them to remove irrelevant info
# - base_retriever: finds relevant chunks using rephrased queries
# - base_compressor: uses an LLM (like GPT-3.5) to extract only the relevant parts from those chunks
# The result is: fewer tokens sent to GPT-4, and higher answer accuracy
final_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,            # LLM-based filter that summarizes each retrieved chunk
    base_retriever=multi_query_retriever   # Retriever that fetches diverse results using reworded queries
)


In [None]:
# Build the RetrievalQA chain (RAG = retrieval + generation)
qa_chain = RetrievalQA.from_chain_type(

    # 🔍 This is the LLM that will generate the final answer (uses GPT-4 for better quality)
    llm=ChatOpenAI(openai_api_key=openai_key, temperature=0, model="gpt-4"),

    # ⚙️ Chain type = "stuff" means it will "stuff" all the retrieved context into a single prompt
    # Other options: "map_reduce", "refine", etc. — but "stuff" is simplest and fastest
    chain_type="stuff",

    # 📚 This is your custom retriever: multi-query + compression
    # It finds relevant chunks and filters them before sending to GPT-4
    retriever=final_retriever,

    # 📎 This returns the source docs used to generate the answer
    # Useful for traceability, UI display, or debugging
    return_source_documents=True
)


In [None]:
query = "What should drivers do during their shift?"
response = qa_chain.invoke(query)

print("Question:", query)
print("Answer:", response["result"])
print("\nRetrieved Sources:")
for doc in response['source_documents']:
    print("-", doc.page_content)


In [5]:
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("Hello! I am your AI assistant. Ask me anything.")

if __name__ == "__main__":
    app = ApplicationBuilder().token(telegram_key).build()
    app.add_handler(CommandHandler("start", start))
    app.run_polling()

RuntimeError: Cannot close a running event loop