In [1]:
from mistralai import Mistral
import os
from dotenv import load_dotenv

load_dotenv('../.envrc')



True

In [2]:
import phoenix as px
import llama_index.core

def launch_phoenix():
    px.launch_app()
    llama_index.core.set_global_handler("arize_phoenix")

def close_phoenix():
    px.close_app()


### Ingestion Setup for PDF Files
- Parse PDF to text
- Select embedding model
- Connection to vector store
    - Create table if not exists
- Create index pipeline
- Run the pipeline on the parsed text

In [33]:
# file_name = 'fy2025_budget_statement.pdf'
# file_name = 'budget-debate-round-up-speech.pdf'
# file_name = 'fy2025_budget_booklet_english.pdf'
# file_name = 'fy2025_budget_booklet_chinese.pdf'
# file_name = 'fy2025_budget_booklet_malay.pdf'
file_name = 'fy2025_budget_booklet_tamil.pdf'
file_path = f'../data/{file_name}'

#### Convert PDF to text

Use [Mistral OCR API](https://docs.mistral.ai/capabilities/document/) for PDF parsing because of support for
- Parse PDF into markdown
- Allow for images in pdf (base64)


In [21]:
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
assert MISTRAL_API_KEY is not None
client = Mistral(api_key=MISTRAL_API_KEY)

In [34]:
uploaded_pdf = client.files.upload(
    file={
        "file_name": file_name,
        "content": open(file_path, "rb"),
    },
    purpose="ocr"
)

signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
print(f'signed_url: {signed_url}')

# Send to Mistral OCR API
ocr_response = client.ocr.process(
            model="mistral-ocr-latest",
            document={
                "type": "document_url",
                "document_url": signed_url.url,
            },
            include_image_base64=False,
        )
        
markdown = '\n\n'.join([page.markdown for page in ocr_response.pages])

signed_url: url='https://mistralaifilesapiprodswe.blob.core.windows.net/fine-tune/ca9f74b9-2aeb-457f-a3ac-a81ac3401e24/ce5a4f6f5f06405790b1a51df7ad7543.pdf?se=2025-04-16T08%3A35%3A18Z&sp=r&sv=2025-05-05&sr=b&sig=eLu6UF9z/SZOrnql2z9rZ3Pn9%2BSC1VX1IOKgX/bF63M%3D'


In [None]:
# see the markdown
from IPython.display import Markdown, display

display(Markdown(markdown))


Extract from image (if needed)

In [None]:
# Extract from image (not needed)
# ocr_response = client.ocr.process(
#     model="mistral-ocr-latest",
#     document={
#         "type": "image_url",
#         "image_url": "https://www.mof.gov.sg/docs/librariesprovider3/budget2025/images/resources/fy2025_budget_disbursement_calendar_english.png"
#     }
# )


#### Load Embedding model

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import re

model_name = "intfloat/multilingual-e5-large"

embed_model = HuggingFaceEmbedding(model_name=model_name)

embedding_model_dimensions = 1024


#### Connect to vector store

In [5]:
DB_HOST = os.getenv('DB_HOST')
assert DB_HOST is not None
DB_PORT = os.getenv('DB_PORT')
assert DB_PORT is not None
DB_USER = os.getenv('DB_USER')
assert DB_USER is not None
DB_PASSWORD = os.getenv('DB_PASSWORD')
assert DB_PASSWORD is not None
DB_NAME = os.getenv('DB_NAME')
assert DB_NAME is not None

DB_URL = f'postgresql+asyncpg://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'


Create database and table (Initial setup)

In [6]:
from llama_index.vector_stores.postgres import PGVectorStore

table_prefix = 'budget_2025-'
# regex to only have words, numbers, and dashes
# replace / with -
model_name_clean = re.sub(r'[^a-zA-Z0-9\-]', '-', model_name)
table_name = f'{table_prefix}{model_name_clean}'
table_name


'budget_2025-intfloat-multilingual-e5-large'

In [7]:
# Change this to True if creating the table for the first time
perform_setup = False
vector_store = PGVectorStore.from_params(
            host=DB_HOST,
            port=DB_PORT,
            database=DB_NAME,
            user=DB_USER,
            password=DB_PASSWORD,
            table_name=table_name,
            perform_setup=perform_setup,
            embed_dim=embedding_model_dimensions,
        )

if perform_setup:
    vector_store._initialize()
    print(f'Vector store initialized for {table_name}')


Vector store initialized for budget_2025-intfloat-multilingual-e5-large


In [12]:
from llama_index.core import VectorStoreIndex

vsi = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model
)

#### Create index pipeline

In [13]:
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import MarkdownNodeParser
from llama_index.core.node_parser import SemanticSplitterNodeParser

semantic_splitter = SemanticSplitterNodeParser(embed_model=embed_model)

pipeline = IngestionPipeline(
    transformations=[
        MarkdownNodeParser(),
        semantic_splitter,
        embed_model
    ],
    vector_store=vector_store
)


In [36]:
from llama_index.core.schema import Document

metadata = {
    'source_document': file_name
}

document = Document(text=markdown, metadata=metadata)
document.excluded_embed_metadata_keys = metadata.keys()

nodes = await pipeline.arun(documents=[document])
print(f'{len(nodes)} created for {file_name}')

45 created for fy2025_budget_booklet_tamil.pdf


#### Test retrieval

In [61]:
similarity_top_k = 30
retriever = vsi.as_retriever(similarity_top_k=similarity_top_k)


In [62]:
vsi._embed_model

HuggingFaceEmbedding(model_name='intfloat/multilingual-e5-large', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f10ba05ddc0>, num_workers=None, max_length=512, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None, show_progress_bar=False)

In [63]:
question = "What are some housing subsidies provided in the budget?"

In [64]:
# question = "How much is the government topping up to cultural matching fund?"

nodes = retriever.retrieve(question)
print(nodes[0].score)
print(nodes[0].text)


0.8069594377017614
The measures we have taken in recent years, and are taking in this Budget, will help to mitigate the impact of rising costs.
31. But in the longer term, the best way to adjust to higher prices is to grow the economy and increase productivity, so that all Singaporeans can enjoy higher real incomes and better standards of living. And let me turn to our strategies next in these areas.


### Agent with Tools
Tools:
- Search from knowledge base (QueryEngineTool)
- Search from web (FunctionTool)

##### Query Engine Tool

In [81]:
response_synthesizer._llm

LiteLLM(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f10ba05ddc0>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7f118598a340>, completion_to_prompt=<function default_completion_to_prompt at 0x7f1185751260>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='ollama_chat/llama3.2:3b', temperature=0.1, max_tokens=None, additional_kwargs={}, max_retries=10)

In [33]:
from llama_index.core.tools import QueryEngineTool

query_engine = vsi.as_query_engine(
    llm=response_synthesizer._llm,
    similarity_top_k=similarity_top_k,
    node_postprocessors=[similarity_postprocessor, reranker],
    response_synthesizer=response_synthesizer,
)

search_knowledge_base_tool = QueryEngineTool.from_defaults(
    query_engine,
    name='search_knowledge_base',
    description="Search information from the knowledge base"
)

#### Web Search Tool
Web search just in case the answer is not retrieved from the knowledge base? 

In [35]:
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
assert TAVILY_API_KEY is not None


In [None]:
from llama_index.core.tools import FunctionTool
from tavily import AsyncTavilyClient

async def search_web(query: str) -> str:
    """Useful for using the web to answer questions."""
    client = AsyncTavilyClient(api_key="tvly-...")
    return str(await client.search(query))


tool = FunctionTool.from_defaults(
    search_web,
    name='search_web',
    description="Useful for using the web to answer questions"
    # async_fn=aget_weather,  # optional!
)

In [None]:
from tavily import AsyncTavilyClient

client = AsyncTavilyClient(api_key=TAVILY_API_KEY)

In [40]:
query = "how much CDC vouchers can I get?"

include_domains = ['https://www.mof.gov.sg/singaporebudget']

search_res = await client.search(query=query, include_domains=include_domains)

In [43]:
search_res

{'query': 'how much CDC vouchers can I get?',
 'follow_up_questions': None,
 'answer': None,
 'images': [],
 'results': [{'title': 'Budget | Support For Singaporeans',
   'url': 'https://www.mof.gov.sg/singaporebudget/budget-2025-highlights/support-for-singaporeans',
   'content': 'Budget Speech Budget Resources Budget 2025 Highlights About Budget Budget 2025 Highlights CDC Vouchers [New]  $500 SG60 ActiveSG Credit Top-Up [New]   $100 SG60 Vouchers [New] $600 or $800 Child LifeSG Credits or Edusave Account / Post-Secondary Education Account Top-up [New] $500 MediSave [GSTV] $150 to $450 Large Family LifeSG Credits [New] SG Culture Pass [New]   $100 Personal Income Tax Rebate for Year of Assessment (YA) 2025 [New]   Up to $200 CDC Vouchers [New]  $300 Support for You and Your Households Singapore Budget 2025 is part of the Ministry of Finance, Singapore. Singapore Budget 2025 Budget Speech Budget Statement Budget Resources Budget 2025 Highlights Support For You And Your Household Suppor

In [46]:
print(search_res['results'][0]['content'])


Budget Speech Budget Resources Budget 2025 Highlights About Budget Budget 2025 Highlights CDC Vouchers [New]  $500 SG60 ActiveSG Credit Top-Up [New]   $100 SG60 Vouchers [New] $600 or $800 Child LifeSG Credits or Edusave Account / Post-Secondary Education Account Top-up [New] $500 MediSave [GSTV] $150 to $450 Large Family LifeSG Credits [New] SG Culture Pass [New]   $100 Personal Income Tax Rebate for Year of Assessment (YA) 2025 [New]   Up to $200 CDC Vouchers [New]  $300 Support for You and Your Households Singapore Budget 2025 is part of the Ministry of Finance, Singapore. Singapore Budget 2025 Budget Speech Budget Statement Budget Resources Budget 2025 Highlights Support For You And Your Household Support For Vulnerable Families And Persons With Disabilities About Budget Budget Archives


#### Agent

In [59]:
from llama_index.core.agent.workflow import ReActAgent
from llama_index.core.memory import ChatMemoryBuffer

func_calling_llm = LiteLLM("ollama_chat/llama3.2:3b")

memory = ChatMemoryBuffer.from_defaults(token_limit=40000)

tools = [search_knowledge_base_tool]
agent = ReActAgent(
    name="Budget 2025 RAG Agent", 
    description="An agent that tells you information about the budget",
    llm=func_calling_llm,
    tools=tools
)



In [70]:
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.storage.chat_store.postgres import PostgresChatStore

chat_store = PostgresChatStore.from_uri(DB_URL)

thread_id = 'thread_2'

memory = ChatMemoryBuffer.from_defaults(
    chat_store=chat_store,
    chat_store_key=thread_id,
)

In [71]:
chat_history = memory.get_all()
question = 'as an undergraduate student, what benefits can i get?'
handler = agent.run(question, memory=memory, chat_history=chat_history)

In [72]:
from llama_index.core.agent.workflow import AgentStream, AgentOutput

events = []
async for event in handler.stream_events():
    events.append(event)
    if isinstance(event, AgentStream):
        print(event.delta, end="", flush=True)

    elif isinstance(event, AgentOutput):
       print(f'AgentOutput: {event.response}')  # the current full response
    #    print(event.tool_calls)  # the selected tool calls, if any
    #    print(event.raw)  # the raw llm api response

Thought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: search_knowledge_base
Action Input: {"input": {"title": "Benefits for undergraduate students", "type": "string"}}AgentOutput: assistant: Thought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: search_knowledge_base
Action Input: {"input": {"title": "Benefits for undergraduate students", "type": "string"}}
Thought: The current language of the user is still English. It seems that I need to provide a valid string as input for the tool.
Action: search_knowledge_base
Action Input: {'input': 'Benefits for undergraduate students'}AgentOutput: assistant: Thought: The current language of the user is still English. It seems that I need to provide a valid string as input for the tool.
Action: search_knowledge_base
Action Input: {'input': 'Benefits for undergraduate students'}
Thought: The current language of the user is sti

Maybe use chat engine will be better than agent

#### Corrective RAG Workflow 
- (retrieve_context) Question come in, retrieve context and set in state, maybe perform some postprocessing e.g. reranking
- (generate_answer) Answer question from context
- (grade_answer) Grade answer
- (refine_answer) Refine answer if needed
- (provide_answer) Provide answer to user

State to maintain:
- Question
- Context
- Answer
- Grade
- Refined Answer



In [9]:
from llama_index.core.workflow import (
    Event,
    StartEvent,
    StopEvent,
    Workflow,
    step,
)
from llama_index.core.schema import NodeWithScore
from llama_index.core.base.response.schema import RESPONSE_TYPE

In [115]:
from typing import Annotated
from pydantic import BaseModel, Field

# Start Event
class QuestionEvent(StartEvent):
    question: Annotated[str, "Single question"]
    similarity_top_k: Annotated[int, "Number of nodes to retrieve"] = 20


class NodesRetrievedEvent(Event):
    retrieved_nodes: Annotated[list[NodeWithScore], "Retrieved nodes"]

class NodesRerankedEvent(Event):
    reranked_nodes: Annotated[list[NodeWithScore], "Reranked nodes"]

# class AnswerGeneratedFromContextEvent(Event):
class AnswerGeneratedFromContextEvent(StopEvent):
    question: Annotated[str, "Question"]
    context: Annotated[str, "Context"]
    answer: Annotated[str, "Answer generated from context"]

class GraderOutput(BaseModel):
    is_grounded: bool = Field(description="Whether the answer is grounded in the context")
    confidence: float = Field(
        gt=0.0, lt=1.0,
        description="Confidence value between 0.00 and 1.00 of how grounded the answer is obtained from the context.",
    )
    confidence_explanation: str = Field(..., description="Explanation for the confidence score")

class AnswerGradedEvent(Event):
    grader_output: Annotated[GraderOutput, 'Output object from grading the answer with respect to the context']
    question: Annotated[str, "Question"]
    context: Annotated[str, "Context"]
    answer: Annotated[str, "Answer"]
    
# Stop Event
class AnswerGeneratedEvent(StopEvent):
    answer: Annotated[str, "Answer to user's question. If the answer is grounded in the context, then the answer will be the generated answer from context. Otherwise, the answer will be a fallback answer."]


In [116]:
from llama_index.core.prompts import PromptTemplate
DEFAULT_RELEVANCY_GROUNDING_PROMPT_TEMPLATE = PromptTemplate(
    template="""As a grader, your task is to evaluate the grounding of a generated answer in the context provided with respect to the user's question.

    <question-start>:
    \"\"\"
    {question_str}
    \"\"\"
    <question-end>

    <context-start>:
    \"\"\"
    {context_str}
    \"\"\"
    <context-end>

    <answer-start>:
    \"\"\"
    {answer_str}
    \"\"\"
    <answer-end>

    Evaluation Criteria:
    - Consider whether the answer answers the question.
    - Consider whether the answer can be inferred from the context.

    """
)


In [14]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.2:3b", request_timeout=60.0)  # 8.2s to answer question


In [123]:
from llama_index.postprocessor.jinaai_rerank import JinaRerank

JINA_API_KEY = os.getenv('JINA_API_KEY')
assert JINA_API_KEY is not None

postprocessor = JinaRerank(
    top_n=20, model="jina-reranker-v1-base-en", api_key=JINA_API_KEY
)

# Testing
# reranked_nodes = postprocessor.postprocess_nodes(nodes, query_str=question)
# len(reranked_nodes)


In [124]:
from llama_index.core.prompts import PromptTemplate

DEFAULT_ANSWER_GENERATION_PROMPT_TEMPLATE = PromptTemplate(
    template="""Your task is to answer the user's question about Singapore government budget statement based on the context provided. Be detailed and objective in your answer.

    Context: \"\"\"
    {context_str}
    \"\"\"

    User Question: \"\"\"
    {query_str}
    \"\"\"

    """
)

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.core.workflow import Context
from llama_index.core.response_synthesizers import get_response_synthesizer
from llama_index.core.response_synthesizers.type import ResponseMode
from llama_index.core.schema import MetadataMode
from guardrails import Guard
import guardrails as gd
from llama_index.core.llms import ChatMessage
from llama_index.core.llms import ChatResponse
from llama_index.llms.litellm import LiteLLM

# Define the prompt
guard_structured_prompt = """
Query string here.

${gr.xml_prefix_prompt}

${output_schema}

${gr.json_suffix_prompt_v2_wo_none}
"""

class QAWorkflow(Workflow):
    # llm = OpenAI(model="gpt-4o-mini")  # llm to generate answer
    # llm = Ollama(model="llama3.2:3b", request_timeout=60.0)
    llm = LiteLLM("ollama_chat/llama3.2:3b")
    vsi: VectorStoreIndex = VectorStoreIndex.from_vector_store(
        vector_store=vector_store,
        embed_model=embedding_model
    )
    postprocessor = JinaRerank(
        top_n=20, model="jina-reranker-v1-base-en", api_key=JINA_API_KEY
    )
    
    grader_output_guard = gd.Guard.from_pydantic(output_class=GraderOutput, prompt=guard_structured_prompt)

    grader_output_parser = GuardrailsOutputParser(grader_output_guard)
    grader_llm = Ollama(model="llama3.2:3b", request_timeout=60.0, output_parser=grader_output_parser)

    @step
    async def retrieve_nodes(self, ctx: Context, ev: QuestionEvent) -> NodesRetrievedEvent:
        question = ev.question
        # set question in global state
        await ctx.set("question", question)

        retriever = self.vsi.as_retriever(similarity_top_k=ev.similarity_top_k)
        qe = vsi.as_query_engine()
        nodes = await retriever.aretrieve(question)
        
        return NodesRetrievedEvent(retrieved_nodes=nodes)

    @step
    async def rerank_nodes(self, ctx: Context, ev: NodesRetrievedEvent) -> NodesRerankedEvent:
        nodes = ev.retrieved_nodes
        question = await ctx.get("question")

        reranked_nodes = postprocessor.postprocess_nodes(nodes, query_str=question)
        return NodesRerankedEvent(reranked_nodes=reranked_nodes)

    @step
    async def generate_answer_from_context(self, ctx: Context, ev: NodesRerankedEvent) -> AnswerGeneratedFromContextEvent:
        nodes = ev.reranked_nodes
        question = await ctx.get("question")
        print(f'in generate_answer_from_context, question: {question}')

        # set context in global state for reference subsequently
        context_str = "\n\n".join([node.get_content(MetadataMode.LLM) for node in nodes])
        # await ctx.set("context_str", context_str)

        response_synthesizer = get_response_synthesizer(llm=self.llm, response_mode=ResponseMode.COMPACT)
        
        response = await response_synthesizer.asynthesize(query=question, nodes=nodes)
        print(f'in generate_answer_from_context, response: {response}')
        
        return AnswerGeneratedFromContextEvent(answer=response.response, question=question, context=context_str)

    # @step
    # async def grade_answer(self, ctx: Context, ev: AnswerGeneratedFromContextEvent) -> AnswerGradedEvent:
    #     question, answer, context = ev.question, ev.answer, ev.context
    #     print(f'in grade_answer, question: {question}')
    #     print(f'in grade_answer, answer: {answer}')

    #     grading_template = DEFAULT_RELEVANCY_GROUNDING_PROMPT_TEMPLATE.format(question_str=question, context_str=context, answer_str=answer)

    #     grading_template_with_guard = self.grader_output_parser.format(grading_template)

    #     grader_output_response: ChatResponse = await self.grader_llm.achat([ChatMessage(role='user', content=grading_template_with_guard)])

    #     grader_output_response_str = grader_output_response.message.content
    #     grader_output = GraderOutput.model_validate(grader_output_response_str)

    #     return AnswerGradedEvent(grader_output=grader_output, question=question, context=context, answer=answer)
    
    # @step
    # async def generate_answer(self, ctx: Context, ev: AnswerGradedEvent) -> AnswerGeneratedEvent:
    #     # check if the answer is grounded in the context
    #     answer = 'Sorry, I don\'t have enough information to answer that question.'
    #     if ev.grader_output.is_grounded:
    #         answer = ev.answer
        
    #     return AnswerGeneratedEvent(answer=answer)


NameError: name 'JinaRerank' is not defined

In [128]:
llm

LiteLLM(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f6774a90fb0>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7f6775afa700>, completion_to_prompt=<function default_completion_to_prompt at 0x7f67758c9620>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='ollama_chat/llama3.2:3b', temperature=0.1, max_tokens=None, additional_kwargs={}, max_retries=10)

In [129]:
chat_engine = vsi.as_chat_engine(llm=llm, no)

In [130]:
chat_engine.chat("What do i benefit as an undergraduate student?")

AgentChatResponse(response='As an undergraduate student, you may benefit from various initiatives such as lifelong learning programs, skills development opportunities, and support services that cater to your needs. These initiatives can help you develop valuable skills, enhance your employability, and achieve your career goals. Additionally, you may be eligible for financial assistance or scholarships to support your education and personal development.', sources=[ToolOutput(content='There is no mention of undergraduate students in the provided context. The context primarily discusses initiatives related to lifelong learning, skills development, and support for workers, particularly mid-career Singaporeans and lower-wage workers, as well as measures to help households cope with utilities expenses.', tool_name='query_engine_tool', raw_input={'input': 'What are the benefits for an undergraduate student'}, raw_output=Response(response='There is no mention of undergraduate students in the p

In [None]:
question = "What do i benefit as an undergraduate student?"
nodes = vsi.as_retriever(similarity_top_k=20).retrieve(question)

reranked_nodes = postprocessor.postprocess_nodes(nodes, query_str=question)


In [127]:
# streaming litellm
llm = LiteLLM("ollama_chat/llama3.2:3b")

response_synthesizer = get_response_synthesizer(llm=llm, response_mode=ResponseMode.COMPACT)
        



In [None]:
response = await response_synthesizer.asynthesize(query=question, nodes=nodes, )

In [135]:
w = QAWorkflow(timeout=120,verbose=True)
result = await w.run(start_event=QuestionEvent(question="What do i gain if i am an undergraduate student?"))
print(result)


Running step retrieve_nodes
Step retrieve_nodes produced event NodesRetrievedEvent
Running step rerank_nodes
Step rerank_nodes produced event NodesRerankedEvent
Running step generate_answer_from_context
in generate_answer_from_context, question: What do i gain if i am an undergraduate student?
in generate_answer_from_context, response: Based on the new context of changing circumstances, I'll rewrite the answer:

As a Singaporean undergraduate student, navigating the current economic landscape can be challenging. However, with the government's emphasis on technology and innovation, enterprise ecosystem, and infrastructure investments, there are opportunities to enhance your skills and knowledge.

You can develop in-demand expertise through training programs and workshops that drive efficiency and innovation in various sectors, such as those related to technology and sustainability. Additionally, connecting with like-minded individuals and entrepreneurs who share your vision for creating

WorkflowRuntimeError: Error in step 'grade_answer': 1 validation error for GraderOutput
  Input should be a valid dictionary or instance of GraderOutput [type=model_type, input_value='```json\n{\n  "is_ground...in the context"\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type

In [None]:
ans: RESPONSE_TYPE = result.answer
print(ans)

The total budget for the year is not explicitly stated in the provided information. However, it mentions expected surpluses for FY2024 and FY2025, which are projected to be \$6.4 billion and \$6.8 billion, respectively. For a specific total budget figure, additional details would be required.


In [43]:
question = "What is the capital of France?"
context = "The capital of France is Paris."
answer = "The capital of France is Paris."


In [18]:
sllm = llm.as_structured_llm(output_cls=GraderOutput)

In [54]:
from llama_index.core.llms import ChatMessage
grade_prompt = DEFAULT_RELEVANCY_GROUNDING_PROMPT_TEMPLATE.format(context_str=context, answer_str=answer)


In [23]:
res = sllm.chat([ChatMessage(role="user", content=prompt)])

In [24]:
res.message.content

'{"is_grounded":true,"confidence":0.8,"confidence_explanation":"The answer matches the exact wording of the context, which indicates a strong logical connection and use of relevant keywords."}'

#### LiteLLM

In [10]:
from llama_index.llms.litellm import LiteLLM
from llama_index.core.llms import ChatMessage
# openai call
llm = LiteLLM("ollama_chat/llama3.2:3b")

messages = [ChatMessage(role="user", content='hello tell me a joke about singapore')]



In [11]:
chat_response = llm.chat(messages)

In [13]:
print(chat_response)

assistant: Here's one:

Why did the chili crab from Singapore go to therapy?

Because it was feeling a little "steamed"!

(Sorry, I know it's a bit of a seafood pun, but I hope it made you crack a smile!)


In [26]:
resp = await llm.astream_chat(messages)

In [24]:
print(resp)

assistant: Here's one:

What do you call a fake noodle?

(wait for it...)

An impasta!

Hope that made you smile! Do you want to hear another one?


In [28]:
async for r in resp:
    print(r.delta, end='')

Here's one:

Why did the chili crab from Singapore go to therapy?

Because it was feeling a little "steamed"!

(Sorry, I couldn't resist the seafood pun!)

#### Guard

In [13]:
ollama_model_name = 'ollama_chat/llama3.2:3b'

In [14]:
from guardrails import Guard


guard = Guard()

result = guard(
    messages=[{"role":"user", "content":"How many moons does Jupiter have?"}],
    model=ollama_model_name,
)

print(f"{result.validated_output}")



Jupiter has a total of 92 confirmed moons. However, it's worth noting that this number may not be definitive, as there are many smaller, irregular moons that have not been officially confirmed or named.

The four largest and most well-known moons of Jupiter are:

1. Io
2. Europa
3. Ganymede
4. Callisto

These four moons are known as the Galilean moons, as they were discovered by Galileo Galilei in 1610.


In [80]:
class GraderOutput(BaseModel):
    is_grounded: bool = Field(description="Whether the answer is grounded in the context")
    confidence: float = Field(
        gt=0.0, lt=1.0,
        description="Confidence value between 0-1 of how grounded the answer is obtained from the context.",
    )
    confidence_explanation: str = Field(..., description="Explanation for the confidence score")

In [81]:
guard = Guard.for_pydantic(GraderOutput)

In [82]:
question = 'What benefits do i get if i am an undergraduate student?'
context = 'Singapore government provides various benefits to undergraduate students. For example, they can apply for the Singaporean government scholarship to study in Singapore. They can also apply for the Singaporean government loan to study in Singapore. They can also apply for the Singaporean government grant to study in Singapore.'
# answer = 'The capital of France is Paris.'
answer = 'Students can apply for the Singaporean government scholarship, take a loan or apply for a government grant to study in Singapore.'


prompt = DEFAULT_RELEVANCY_GROUNDING_PROMPT_TEMPLATE.format(question_str=question, context_str=context, answer_str=answer)

In [83]:
messages = [{
  "role": "system",
  "content": "You are a helpful assistant."
}, {
  "role": "user",
  "content": prompt
}]

In [84]:
prompt+="""

${gr.complete_json_suffix_v3}
"""
response = guard(
    model=ollama_model_name,
    messages=messages,
    # prompt_params={"chat_history": chat_history},
)



In [87]:
response.validated_output

{'is_grounded': True,
 'confidence': 0.7,
 'confidence_explanation': 'Partially. The context provides information about various government benefits available to undergraduate students, but it does not explicitly mention a scholarship, loan, or grant.'}

In [88]:
response_str = str(response.validated_output)
response_str

"{'is_grounded': True, 'confidence': 0.7, 'confidence_explanation': 'Partially. The context provides information about various government benefits available to undergraduate students, but it does not explicitly mention a scholarship, loan, or grant.'}"

In [91]:
json.loads(data_str)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [38]:
print(response.validated_output)

```
{
  "deliveries": [
    {
      "date": "June 3",
      "pickup": {
        "address": "797 9th Avenue, Manila",
        "time": "10:00am"
      },
      "dropoff": {
        "address": "Courthouse, 61 Center Street C/O frank james",
        "time": "10:30am"
      }
    },
    {
      "date": "June 2",
      "pickup": {
        "address": "21 3rd Street",
        "time": "11:00am",
        "item": "flowers",
        "cost": 14.50
      },
      "dropoff": {
        "address": "75th Ave",
        "time": "5:30pm"
      }
    },
    {
      "date": "June 3",
      "pickup": {
        "address": "331 5th Street",
        "time": "11:00am",
        "item": "bagels",
        "cost": 34.50
      },
      "dropoff": {
        "address": "75th Ave",
        "time": "5:30pm"
      }
    }
  ]
}
```


In [39]:
if isinstance(response.validated_output, str):
    
else:
    Schedule.model_validate(response.validated_output)

True

In [29]:
Schedule.model_validate_strings(response.validated_output)

# response.validated_output

ValidationError: 1 validation error for Schedule
  Input should be an object [type=model_type, input_value='```\n{\n  "deliveries": ...  }\n    }\n  ]\n}\n```', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type

In [None]:
tools = [] # an open ai compatible list of tools

response = guard(
    model=ollama_model_name,
    messages=messages,
    prompt_params={"chat_history": chat_history},
    tools=guard.json_function_calling_tool(tools),
    tool_choice="required",
)

In [67]:
from llama_index.llms.openai import OpenAI

# Create a guard object
guard = gd.Guard.from_pydantic(output_class=GraderOutput)

# Create output parse object
output_parser = GuardrailsOutputParser(guard)

# attach to an llm object
grader_llm = Ollama(model="llama3.2:3b", request_timeout=60.0, output_parser=output_parser)



In [None]:
import openai
openai.completions.create()

In [86]:
guard_res = guard(llm_api=grader_llm.complete, prompt=fmt_qa_tmpl)
guard_res

PromptCallableException: The callable `fn` passed to `Guard(fn, ...)` failed with the following error: `1 validation error for LLMResponse
output
  Input should be a valid string [type=string_type, input_value=CompletionResponse(text='...gprobs=None, delta=None), input_type=CompletionResponse]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type`. Make sure that `fn` can be called as a function that takes in a single prompt string and returns a string.

In [77]:
output_parser.parse(res.text)

In [64]:
res.message.dict()

/tmp/ipykernel_4322/1398668521.py:1: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  res.message.dict()


{'role': <MessageRole.ASSISTANT: 'assistant'>,
 'additional_kwargs': {'tool_calls': []},
 'blocks': [{'block_type': 'text',
   'text': 'To evaluate the grounding of the generated answer in the provided context, let\'s break down the evaluation criteria:\n\n1. **Context Analysis**: The context contains a clear statement about the capital of France. It presents this information in a straightforward manner without any ambiguity or uncertainty.\n\n2. **Answer Content**: The answer also states that "The capital of France is Paris." This directly mirrors the content found within the provided context.\n\n3. **Grounding Evaluation**:\n   - **Contextual Relevance**: Both the context and the answer are highly relevant to each other, as they both refer to the same piece of information (the capital of France).\n   - **Directness**: The answer is not only directly related to the topic but it\'s also a direct repetition or paraphrasing of the statement found in the context. It does not introduce new

In [36]:
from llama_index.core.prompts.default_prompts import (
    DEFAULT_TEXT_QA_PROMPT_TMPL,
)

# take a look at the new QA template!
fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
print(fmt_qa_tmpl)

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


Given below is XML that describes the information to extract from this document and the tags to extract it into.


<output>
    <bool name="is_grounded" description="Whether the answer is grounded in the context"/>
    <float name="confidence" description="Confidence value between 0-1 of the correctness of the result."/>
    <string name="confidence_explanation" description="Explanation for the confidence score"/>
</output>



ONLY return a valid JSON object (no other text is necessary). The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise.




In [38]:
print(prompt)


Query string here.

${gr.xml_prefix_prompt}

${output_schema}

${gr.json_suffix_prompt_v2_wo_none}



In [35]:
grader_llm.chat([ChatMessage(role="user", content=prompt)])

'The capital of France is Paris.'

In [30]:
print(DEFAULT_TEXT_QA_PROMPT_TMPL)

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


In [28]:
from llama_index.core.prompts.default_prompts import (
    DEFAULT_TEXT_QA_PROMPT_TMPL,
)

# take a look at the new QA template!
fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
print(fmt_qa_tmpl)

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


Given below is XML that describes the information to extract from this document and the tags to extract it into.


<output>
    <object name="points" description="Bullet points regarding events in the author's life.">
        <string name="explanation"/>
        <string name="explanation2"/>
        <string name="explanation3"/>
    </object>
</output>



ONLY return a valid JSON object (no other text is necessary). The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise.




In [31]:
from guardrails import Guard
import os

# Set your openai API key here
# os.environ["OPENAI_API_KEY"] = [YOUR API KEY]

guard = Guard()

res = guard(
    model="gpt-3.5-turbo",
    messages=[{
        "role": "user",
        "content": "How do I make a cake?"
    }]
)

print(res.raw_llm_output)
print(res.validated_output)
print(res.validation_passed)

TypeError: Guard.__call__() missing 1 required positional argument: 'llm_api'

In [26]:
from llama_index.postprocessor.jinaai_rerank import JinaRerank

JINA_API_KEY = os.getenv('JINA_API_KEY')
assert JINA_API_KEY is not None

postprocessor = JinaRerank(
    top_n=20, model="jina-reranker-v1-base-en", api_key=JINA_API_KEY
)

# Testing
# reranked_nodes = postprocessor.postprocess_nodes(nodes, query_str=question)
# len(reranked_nodes)


In [27]:
from llama_index.core.prompts import PromptTemplate

DEFAULT_ANSWER_GENERATION_PROMPT_TEMPLATE = PromptTemplate(
    template="""Your task is to answer the user's question about Singapore government budget statement based on the context provided. Be detailed and objective in your answer.

    Context: \"\"\"
    {context_str}
    \"\"\"

    User Question: \"\"\"
    {query_str}
    \"\"\"

    """
)

In [37]:
from llama_index.llms.ollama import Ollama
question = "What is the capital of France?"
# llm = Ollama(model="tinyllama", request_timeout=60.0)  # slow, took 37.8s to answer question
llm = Ollama(model="llama3.2:3b", request_timeout=60.0)  # 8.2s to answer question


In [38]:
from llama_index.core.llms import ChatMessage
llm.chat([ChatMessage(role="user", content="What is the capital of France?")])

ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={'tool_calls': []}, blocks=[TextBlock(block_type='text', text='The capital of France is Paris.')]), raw={'model': 'llama3.2:3b', 'created_at': '2025-04-12T16:00:33.990030653Z', 'done': True, 'done_reason': 'stop', 'total_duration': 8223440021, 'load_duration': 7548821024, 'prompt_eval_count': 32, 'prompt_eval_duration': 438269147, 'eval_count': 8, 'eval_duration': 233468572, 'message': Message(role='assistant', content='The capital of France is Paris.', images=None, tool_calls=None), 'usage': {'prompt_tokens': 32, 'completion_tokens': 8, 'total_tokens': 40}}, delta=None, logprobs=None, additional_kwargs={})

In [104]:
def add(a: int, b: int) -> int:
    return a + b

def sub(a: int, b: int) -> int:
    return a - b



In [50]:
chat_response

ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, additional_kwargs={}, blocks=[TextBlock(block_type='text', text="Hi! I'm doing well, thanks for asking. It's great to chat with you. How about you? What's new and exciting in your world?")]), raw=ModelResponse(id='chatcmpl-b04b8dfe-39a5-4313-90d5-5f0242bd9661', created=1744386869, model='ollama/llama3:latest', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content="Hi! I'm doing well, thanks for asking. It's great to chat with you. How about you? What's new and exciting in your world?", role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=33, prompt_tokens=10, total_tokens=43, completion_tokens_details=None, prompt_tokens_details=None)), delta=None, logprobs=None, additional_kwargs={})