In [3]:
class QueryUnderstanding(dspy.Signature):
    """Expand user question into retrievable terms and a search plan."""
    question = dspy.InputField(desc="The user’s natural language question")
    search_terms = dspy.OutputField(desc="Keywords or expanded query for retrieval")
    search_plan = dspy.OutputField(desc="Description of which retrieval strategies to use")

class EvidenceSelection(dspy.Signature):
    """Select the most relevant evidence from retrieved units."""
    question = dspy.InputField(desc="The user’s question")
    candidates = dspy.InputField(desc="List of candidate text snippet strings")
    selected = dspy.OutputField(desc="List of the most relevant snippet strings")

class AnswerSynthesis(dspy.Signature):
    """Compose final answer with inline citations and JSON anchors."""
    question = dspy.InputField(desc="The user’s question")
    evidence = dspy.InputField(desc="List of selected evidence snippets")
    answer_text = dspy.OutputField(desc="Generated natural-language answer")
    citations_json = dspy.OutputField(desc="List of anchors/citation metadata")

In [None]:
# --- Agent / Module ---
class WorkspaceAgent(dspy.Module):
    def __init__(self, retriever):
        super().__init__()
        self.retriever = retriever  # your DB / index backend
        self.understand = dspy.Predict(QueryUnderstanding)
        self.select = dspy.Predict(EvidenceSelection)
        self.synthesize = dspy.Predict(AnswerSynthesis)

    def forward(self, question: str):
        # 1. Query → retrieval plan
        q_resp = self.understand(question=question)
        search_terms = q_resp.search_terms
        search_plan = q_resp.search_plan

        # 2. Fetch candidate units
        candidates = []
        candidates += self.retriever.search_text(search_terms)
        candidates += self.retriever.search_tables(search_terms)
        candidates += self.retriever.search_images(search_terms)

        # Prepare candidate strings for the LLM
        candidate_strs = [
            f"[{c['unit_id']}] {c['content']} (p. {c['page']}, section {c.get('section_path','')})"
            for c in candidates
        ]

        # 3. Evidence selection
        sel_resp = self.select(question=question, candidates=candidate_strs)
        selected_snippets = sel_resp.selected

        # 4. Answer synthesis
        ans_resp = self.synthesize(question=question, evidence=selected_snippets)
        return {
            "answer": ans_resp.answer_text,
            "citations": ans_resp.citations_json
        }

In [None]:
# --- Retriever stub for testing ---
class MockRetriever:
    def search_text(self, terms):
        return [
            {
                "unit_id": "block_123",
                "doc_id": "manual_v1",
                "page": 47,
                "bbox": [100, 200, 400, 250],
                "section_path": "2.3 Warranty",
                "content": "Exclusions: Damage from misuse, accidents, or modifications..."
            }
        ]

    def search_tables(self, terms):
        return []

    def search_images(self, terms):
        return []

In [None]:
retriever = MockRetriever()
agent = WorkspaceAgent(retriever)

resp = agent("What are the warranty exclusions in the manual?")
print("Answer:", resp["answer"])
print("Citations:", resp["citations"])