In [31]:
# imports
import os

from autogen_agentchat.agents import AssistantAgent
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

import httpx
import asyncio
import requests
from PIL import Image
from io import BytesIO
from pydantic import BaseModel, Field
from autogen_agentchat.base import TaskResult
from autogen_core.tools import FunctionTool
from autogen_agentchat.teams import RoundRobinGroupChat, Swarm, SelectorGroupChat
from autogen_core.models import UserMessage
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.models.ollama import OllamaChatCompletionClient
from autogen_agentchat.agents import AssistantAgent, UserProxyAgent, CodeExecutorAgent
from autogen_core import Image as AGImage, CancellationToken  # We will use Image later
from autogen_agentchat.messages import TextMessage, MultiModalMessage, CodeExecutionEvent
from autogen_agentchat.ui import Console
from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination, ExternalTermination
from autogen_ext.code_executors.docker import DockerCommandLineCodeExecutor

# Initialize

In [35]:
os.environ.pop('HTTP_PROXY', None)
os.environ.pop('HTTPS_PROXY', None)
os.environ.pop('http_proxy', None)
os.environ.pop('https_proxy', None)

dotenv.load_dotenv(override=True)
gemini_api_key = os.getenv("GEMINI_KEY")
open_router_api_key = os.getenv("OPENROUTER_KEY")

# Or set to empty
os.environ['HTTP_PROXY'] = ''
os.environ['HTTPS_PROXY'] = ''
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001",
    google_api_key = gemini_api_key,
)
print(embeddings.embed_query("testing!")[:5])

[-0.036367375, -0.019505447, 0.016166167, -0.057357196, -0.008019263]


In [36]:
##################
# Ollama Client. #
##################
ollama_client = OllamaChatCompletionClient(model="llama3.1:latest")

##########################################
# Deepseek free good for simple usecases #
##########################################
deepseek_client = OpenAIChatCompletionClient(
    base_url="https://openrouter.ai/api/v1",
    model="deepseek/deepseek-r1-0528:free",
    api_key=open_router_api_key,
    model_info={
        "family": "deepseek",
        "structured_output": False,
        "vision": True,
        "function_calling": True,
        "json_output": False
    },
    http_client=httpx.AsyncClient(trust_env=False)
)

###########################################
# Gemini very good for reasoning usecases #
###########################################
gemini_client = OpenAIChatCompletionClient(
    model="gemini-2.5-flash",
    api_key=gemini_api_key,
    http_client=httpx.AsyncClient(trust_env=False)
)

########################
# Testing model Client.#
########################
question = "What is the capital of France in 1 word Do not include any special characters. e.g. (Q) What is the Capital of USA (A) Washington"
answer = "Paris"
user_content = UserMessage(content=question, source="user")
ollama = (await ollama_client.create([user_content])).content[:5]
deepseek = (await deepseek_client.create([user_content])).content[:5]
gemini = (await gemini_client.create([user_content])).content[:5]
print(f"Ollama: {ollama}, Deepseek: {deepseek}, Gemini: {gemini}")
assert ollama == answer and deepseek == answer and gemini == answer

Ollama: Paris, Deepseek: Paris, Gemini: Paris


# Ingest to file

In [30]:
PDF_PATH="../project/A2013-18.pdf"
# 0. Verify PDF path
print("Current directory:", os.getcwd())
for file in os.listdir():
    print(f"  {file}")
if not os.path.exists(PDF_PATH):
    raise FileNotFoundError(f"PDF file not found: {PDF_PATH}")

# 1. Load PDF
print("Loading PDF...")
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
print(f"Loaded {len(documents)} pages")

# 2. Split into chunks
print("Splitting documents...")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
print(f"Created {len(chunks)} chunks")

# 3. Create embeddings
print("Creating embeddings...")
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001"
)

# 4. Create and save FAISS vector store
print("Building FAISS index...")
vectorstore = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings
)

# Save to disk as files
vectorstore.save_local("faiss_index")
print("✓ Vector store saved to 'faiss_index' folder")

# Later: Load from disk
print("\nLoading vector store from disk...")
loaded_vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True  # Required for pickle files
)

# Test query
query = "What is this document about?"
results = loaded_vectorstore.similarity_search(query, k=3)
print(f"\nQuery: {query}")
print(f"Top result:\n{results[0].page_content[:200]}")

Current directory: /Users/achuth.iyyatil/Code/personal/stunning-spork/notes
  Autogen Studio.pdf
  Source.zip
  Source
  Autogen Roadmap.excalidraw
  README.md
  Autogen Module 4.pdf
  Autogen .pdf
Loading PDF...
Loaded 370 pages
Splitting documents...
Created 1681 chunks
Creating embeddings...
Building FAISS index...
✓ Vector store saved to 'faiss_index' folder

Loading vector store from disk...

Query: What is this document about?
Top result:
Names, addresses, descriptions 
and occupations of subscribers 
Witnesses (along with names, addresses, 
descriptions and occupations) 
A.B. of………….Merchant Signed before me 
Signature……………. 
C.D. of…


In [55]:
async def fetch_law_points(query: str, k:int = 5) -> str:
    agent = AssistantAgent(
        name='summarizer_agent',
        model_client=gemini_client,
        system_message="""
        You are a summarization agent. Summarize the given text concisely.
        """,
    )
    results = loaded_vectorstore.similarity_search(query, k=k)
    combined_text = "\n\n".join([doc.page_content for doc in results])
    result = await agent.run(task=f"""
        Summarize the contents below based on the context provided.
        Context:
        {combined_text}
    """)
    return result.messages[-1].content
await fetch_law_points("What is this document about?")

"The provided text outlines templates and requirements for company formation documents, specifically focusing on subscriber and witness details for a Memorandum of Association.\n\nIt details the necessary information for **subscribers** (names, addresses, descriptions, occupations, signatures, and number of shares taken) and **witnesses** (names, addresses, descriptions, occupations, and signatures).\n\nSpecial provisions are included for **One Person Companies (OPC)**, requiring a declaration from the sole subscriber to form the company and take all shares, and the appointment of a nominee in the event of the sole member's death.\n\nThe text also presents excerpts from the **Memorandum of Association for Unlimited Companies** (both with and without share capital), outlining sections for the company's name, registered office, and objects."

# Read from file based on query get the best n number of chunks and summarize it

# Tools section

# Agents