In [42]:
import os
import json
import requests
import argparse
from typing import List

In [43]:
# LangChain & AI Imports
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain.agents import create_agent
from dotenv import load_dotenv
load_dotenv()

True

In [44]:
# ==========================================
# 1. GLOBAL RESOURCE INITIALIZATION
# ==========================================
# We load the embedding model once at startup to improve performance.
print("[System] Initializing Embedding Model (all-MiniLM-L6-v2)...")
model_name = "sentence-transformers/all-MiniLM-l6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Initialize Vector DB Connection
# We use the same directory for both writing (ingestion) and reading (agent).
DB_DIRECTORY = "./chroma_grants_db"
COLLECTION_NAME = "oursg_grants"

vector_store = Chroma(
    persist_directory=DB_DIRECTORY,
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME
)
print("[System] Vector Database Connected.")

[System] Initializing Embedding Model (all-MiniLM-L6-v2)...
[System] Vector Database Connected.


In [45]:
# ==========================================
# 2. INGESTION LOGIC (ETL)
# ==========================================
def fetch_and_store_grants():
    """Fetches JSON from API, processes it, and stores embeddings in ChromaDB."""
    url = "https://oursggrants.gov.sg/api/v1/grant_metadata/explore_grants"
    print(f"\n[Ingest] Fetching data from {url}...")

    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print(f"[Error] Failed to fetch data: {e}")
        return

    grants_list = data.get("grant_metadata", [])
    documents = []

    print(f"[Ingest] Processing {len(grants_list)} grants...")

    for grant in grants_list:
        # Create a rich natural language description for the AI to search against
        page_content = (
            f"Grant Name: {grant.get('name', 'Unknown')}\n"
            f"Agency: {grant.get('agency_name', 'Unknown')}\n"
            f"Description: {grant.get('desc', '')}\n"
            f"Target Audience: {', '.join(grant.get('applicable_to', []))}"
        )

        # Sanitize metadata (ChromaDB requires flat types: str, int, float, bool)
        metadata = {
            "id": grant.get("id", ""),
            "name": grant.get("name", "Unknown"),
            "agency_name": grant.get("agency_name", "Unknown"),
            "funding_amount": float(grant.get("grant_amount") or 0.0),
            "updated_at": grant.get("updated_at", "")
        }

        documents.append(Document(page_content=page_content, metadata=metadata))

    if documents:
        print(f"[Ingest] Saving {len(documents)} vectors to disk...")
        # .add_documents will append to the existing DB defined globally
        vector_store.add_documents(documents)
        print("[Ingest] Success! Database updated.")
    else:
        print("[Ingest] No documents found to process.")

fetch_and_store_grants()


[Ingest] Fetching data from https://oursggrants.gov.sg/api/v1/grant_metadata/explore_grants...
[Ingest] Processing 60 grants...
[Ingest] Saving 60 vectors to disk...
[Ingest] Success! Database updated.


In [46]:
@tool
def search_grants_database(query: str) -> str:
    """
    Searches the grants database for relevant schemes based on a natural language query.
    Returns the top 5 matching grants with their IDs, names, and descriptions.
    """
    print(f"\n  >> [Tool Call] Searching DB for: '{query}'")
    results = vector_store.similarity_search(query, k=5)
    
    output = []
    for doc in results:
        output.append(
            f"ID: {doc.metadata['id']}\n"
            f"Name: {doc.metadata['name']}\n"
            f"Agency: {doc.metadata['agency_name']}\n"
            f"Snippet: {doc.page_content[:200]}...\n---"
        )
    return "\n".join(output)

In [47]:
from langchain.messages import HumanMessage
system_prompt = (
        "You are an expert Grants Retrieval Agent for the Singapore Government."
        "Your task is to analyze the user's project requirements (provided in JSON) "
        "and identify the most relevant grants from the database.\n\n"
        "STEPS:\n"
        "1. Analyze the 'issue_area', 'scope_of_grant', and 'KPIs' from the input.\n"
        "2. Use the 'search_grants_database' tool to find matches. Construct a search query "
        "that focuses on the semantic meaning of the project (e.g., 'youth mentorship sports').\n"
        "3. Filter the results. Ignore grants that are clearly for different domains.\n"
        "4. FINAL OUTPUT: Return ONLY a JSON list of the matching Grant IDs. Example: [\"id1\", \"id2\"]."
    )

def create_grant_query_agent():
    """Creates and returns an agent for querying the grants database."""
    agent = create_agent(
        model="gpt-3.5-turbo",
        tools=[search_grants_database],
        system_prompt=system_prompt,
    )
    return agent

In [48]:
main_agent = create_grant_query_agent()

In [53]:
from pprint import pprint
query={
    "issue_area": "environmental sustainability",
    "scope_of_grant": "community projects focused on reducing plastic waste",
    "KPIs": [
        "amount of plastic waste reduced",
        "number of community members engaged in sustainability initiatives"
    ]
}
response = main_agent.invoke(HumanMessage(content=json.dumps(query)))
pprint(response)
grant_list = json.loads(response["messages"][-1].content)
print(grant_list)


  >> [Tool Call] Searching DB for: 'youth mentorship sports'

  >> [Tool Call] Searching DB for: 'community engagement development'

  >> [Tool Call] Searching DB for: 'technology education innovation'
{'messages': [AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 71, 'prompt_tokens': 214, 'total_tokens': 285, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CxDV4uKTRQpHjRw28hi6eU7KnxfZn', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019bb2ad-a230-73c3-8154-3960f650795c-0', tool_calls=[{'name': 'search_grants_database', 'args': {'query': 'youth mentorship sports'}, 'id': 'call_cusf7GfnPVFsyQ0pIFDKCkiz', 'type': 'tool

In [51]:
import requests

def get_grants_from_ids(target_ids, all_grants_data=None):
    """
    Takes a list of Grant IDs and returns the full grant details.
    
    Args:
        target_ids (list): List of strings, e.g. ["id1", "id2"]
        all_grants_data (list, optional): Pass your existing full list of grants here 
                                          if you have it in memory. If None, it fetches 
                                          fresh data from the API.
    """
    # 1. Get the source data (API or Local)
    if all_grants_data is None:
        print("Fetching fresh data from API for lookup...")
        try:
            url = "https://oursggrants.gov.sg/api/v1/grant_metadata/explore_grants"
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()
            all_grants_data = data.get("grant_metadata", [])
        except Exception as e:
            print(f"Error fetching data: {e}")
            return []

    # 2. Convert target_ids to a set for faster lookup
    target_set = set(target_ids)
    
    # 3. Filter the full list to find matches
    matched_grants = [
        grant for grant in all_grants_data 
        if grant.get("id") in target_set
    ]
    
    return matched_grants

matched_grants = get_grants_from_ids(grant_list)
print("\n[Final Result] Matching Grants:")
pprint(matched_grants)

Fetching fresh data from API for lookup...

[Final Result] Matching Grants:
[{'active': 'true',
  'agency_code': 'aic',
  'agency_icon_url': '/AgencyIcon/aic/AIC-Main Logo RGB.jpg',
  'agency_name': 'Agency for Integrated Care',
  'applicable_to': ['organisation'],
  'available': {'individual': False, 'organisation': True},
  'closing_dates': {'organisation': 'Open for Applications'},
  'deactivated_grant_scheme': None,
  'deactivation_url': 'https://www.aic.sg/partners/digital-transformation-plan',
  'deliverables': [],
  'desc': 'PDG supports the sector’s needs by focusing on 5 strategic areas, '
          'while promoting productivity and digitalisation.',
  'enabled': 'true',
  'explorable_categories': [],
  'grant_amount': None,
  'how_to_apply_html': 'Complete the application, which should take about 30 '
                       'mins if you have the information on hand. All fields '
                       'are necessary unless they are marked as optional.',
  'icon_url': None,
  