In [1]:
!pip install dspy-ai openai

Collecting dspy-ai
  Downloading dspy_ai-3.0.1-py3-none-any.whl.metadata (285 bytes)
Collecting dspy>=3.0.1 (from dspy-ai)
  Downloading dspy-3.0.1-py3-none-any.whl.metadata (7.1 kB)
Collecting backoff>=2.2 (from dspy>=3.0.1->dspy-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting joblib~=1.3 (from dspy>=3.0.1->dspy-ai)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting ujson>=5.8.0 (from dspy>=3.0.1->dspy-ai)
  Downloading ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.3 kB)
Collecting optuna>=3.4.0 (from dspy>=3.0.1->dspy-ai)
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting magicattr>=0.1.6 (from dspy>=3.0.1->dspy-ai)
  Downloading magicattr-0.1.6-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting litellm>=1.64.0 (from dspy>=3.0.1->dspy-ai)
  Downloading litellm-1.75.7-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.2/41.2 kB[0m [31m3.7 MB/s[0m eta

In [6]:
import dspy

# Option 1: OpenAI
# lm = dspy.OpenAI(model="gpt-4")
# dspy.settings.configure(lm=lm)

# Option 2: Ollama (for local models)
lm = dspy.LM('ollama/llama3.1', api_base='http://localhost:11434', api_key='')
dspy.configure(lm=lm)

In [11]:
class QA(dspy.Signature):
    """Answer questions with short factoid answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

# Initialize CoT predictor
cot_predictor = dspy.ChainOfThought(QA)

# Test it
response = cot_predictor(question="What is the capital of Canada?")
print("Answer:", response.answer)
print("Reasoning:", response.reasoning)  # Shows CoT steps

Answer: Ottawa
Reasoning: The capital of Canada can be determined by recalling basic geographical knowledge.


In [15]:
import dspy
import json
from typing import List, Dict, Optional

# Define the signature for invoice parsing
class InvoiceParser(dspy.Signature):
    """Extract structured invoice data from raw OCR text and return as JSON."""
    ocr_text = dspy.InputField(desc="Raw OCR text from invoice")
    structured_json = dspy.OutputField(desc="Structured invoice data in JSON format")

# Initialize the predictor (we'll use ChainOfThought for better reasoning)
invoice_predictor = dspy.ChainOfThought(InvoiceParser)

# Load OCR text from file
with open("../ocr_parser/ocr_output.json", "r") as f:
    ocr_data = json.load(f)

# Convert OCR lines to raw text string
def extract_raw_text(ocr_data):
    raw_lines = []
    for block in ocr_data['pages'][0]['blocks']:
        for line in block.get('lines', []):
            words = [word['value'] for word in line.get('words', [])]
            if words:
                raw_lines.append(" ".join(words))
    return "\n".join(raw_lines)

ocr_text = extract_raw_text(ocr_data)

# Define the system prompt as part of the DSPy program
class InvoiceExtractionProgram(dspy.Module):
    def __init__(self):
        super().__init__()
        self.extractor = dspy.ChainOfThought(InvoiceParser)
    
    def forward(self, ocr_text):
        # Prepare the prompt with all requirements
        augmented_prompt = f"""
        Extract structured invoice data from the following raw OCR text:

        {ocr_text}

        Ensure that:
        - All line items are included
        - Final tax amounts (CGST, SGST, IGST) and grand total are captured
        - Bank details (bank name, account number, IFSC, UPI ID if present) are extracted

        Return only valid JSON with the following structure:
        {{
          "vendor": {{
            "name": ...,
            "address": ...,
            "contact": [...],
            "gstin": ...
          }},
          "invoice": {{
            "number": ...,
            "date": ...
          }},
          "bill_to": {{
            "name": ...,
            "address": ...
          }},
          "ship_to": {{
            "name": ...,
            "address": ...
          }},
          "bank_details": {{
            "bank_name": ...,
            "account_number": ...,
            "ifsc": ...,
            "upi_id": ...
          }},
          "line_items": [
            {{
              "description": ...,
              "hsn": ...,
              "uom": ...,
              "quantity": ...,
              "rate": ...,
              "taxable_value": ...,
              "cgst": {{
                "rate": ...,
                "amount": ...
              }},
              "sgst": {{
                "rate": ...,
                "amount": ...
              }}
            }}
          ],
          "total": {{
            "subtotal": ...,
            "cgst_total": ...,
            "sgst_total": ...,
            "igst_total": ...,
            "grand_total": ...
          }}
        }}
        """
        
        # Get the prediction
        response = self.extractor(ocr_text=augmented_prompt)
        return response

# Initialize the program
program = InvoiceExtractionProgram()

# Call the program
response = program(ocr_text)

# Process the response
print("Model response received. Parsing JSON...")
print("Answer:", response.structured_json)
print("Reasoning:", response.reasoning)  # Shows CoT steps

# Try to parse JSON and save to file
try:
    # First attempt to parse the JSON
    structured_data = json.loads(response.structured_json)
    
    # Save to file
    with open("response.json", "w", encoding="utf-8") as f:
        json.dump(structured_data, f, ensure_ascii=False, indent=2)
    print("JSON successfully saved to response.json")
    
except json.JSONDecodeError as e:
    print(f"Failed to parse JSON: {e}")
    print("Raw model response:")
    print(response.structured_json)
    
    # Save the raw response anyway for debugging
    with open("response_raw.txt", "w", encoding="utf-8") as f:
        f.write(response.structured_json)
    print("Saved raw response to response_raw.txt")

Model response received. Parsing JSON...
Answer: {
    "vendor": {
        "name": "RAJ SUPER WHOLESALE BAZAR",
        "address": "45,AMBA PRASAD TIWARI MARG, DAULATGANJ UAIN-MP-456001",
        "contact": ["0734-4060723", "9993736333"],
        "gstin": "23CTOPS449201ZX WRFAMT de"
    },
    "invoice": {
        "number": "CN3- 2254",
        "date": "27-Feb-2019"
    },
    "bill_to": {
        "name": "RAJ DATA PROCESSORS",
        "address": ""
    },
    "ship_to": {
        "name": "RAJ DATA PROCESSORS",
        "address": "Add 45, DAULATGANJ, UJJAIN"
    },
    "bank_details": {
        "bank_name": "BANK OF INDIA",
        "account_number": "9100123456456",
        "ifsc": "BKID00001901",
        "upi_id": ""
    },
    "line_items": [
        {
            "description": "SWADIST SOYA OIL ILTR (POUCH)",
            "hsn": "23- Madhya Pradesh",
            "uom": "",
            "quantity": 10,
            "rate": 78.10,
            "taxable_value": 780.95,
            "cgst":