# Image-text-text model benchmarks for total extraction from tickets

In [5]:
from transformers import AutoModelForCausalLM, pipeline
from PIL import Image
import pandas as pd

In [6]:
from transformers import AutoModelForVision2Seq

# Microsoft's Moondream model
moondream_model = AutoModelForCausalLM.from_pretrained(
    "vikhyatk/moondream2",
    revision="2025-06-21",
    trust_remote_code=True,
    device_map={"": "mps"}  # ...or 'mps', on Apple Silicon
)

# IBM's Granite Docling model
granite_docling_model = AutoModelForVision2Seq.from_pretrained("ibm-granite/granite-docling-258M")
pipe = pipeline("image-text-to-text", model="ibm-granite/granite-docling-258M")

Fetching 2 files: 100%|██████████| 2/2 [00:00<00:00, 24966.10it/s]
Device set to use mps:0


In [None]:
import re

query = f"What is the total amount of the receipt? Return only the amount with the currency symbol, no other text."
evaluation_size = 100

def evaluate_moondream_model(model):
    correct_predictions = 0
    incorrect_predictions = 0
    i = 0
    labels_file = open("images/receipt_dataset/labels.csv", "r")

    for line in labels_file.readlines():
        if i > evaluation_size:
            break
        file_path, label_total = line.strip().split(',')
        image = Image.open(f"images/receipt_dataset/{file_path}")
        model_response = model.query(image, f"{query}")["answer"]
        cleaned_model_response = re.sub(r"\$", "", model_response).strip()
        if f"${cleaned_model_response}" == label_total.strip():
            correct_predictions += 1
        else:
            incorrect_predictions += 1
        print(f"File path: {file_path} Model prediction: {model_response} Label: {label_total}")
        print(f"Current Accuracy: {(correct_predictions / (i + 1)) * 100}%")
        print("-"*100)
        i += 1

    print(f"Final Accuracy: {(correct_predictions / i) * 100}%")

def evaluate_granite_docling_model(model):
    correct_predictions = 0
    incorrect_predictions = 0
    i = 0
    labels_file = open("images/receipt_dataset/labels.csv", "r")

    for line in labels_file.readlines():
        if i > evaluation_size:
            break
        file_path, label_total = line.strip().split(',')
        image = Image.open(f"images/receipt_dataset/{file_path}")
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": image},
                    {"type": "text", "text": query}
                ]
            },
        ]
        result = pipe(text=messages)
        model_response = result[0]["generated_text"][1]["content"]
        cleaned_model_response = re.sub(r"[a-zA-Z]", "", model_response).strip()
        cleaned_model_response_2 = re.sub(r"\.$", "", cleaned_model_response)
        if f"${cleaned_model_response_2}" == label_total.strip():
            correct_predictions += 1
        else:
            incorrect_predictions += 1
        print(f"File path: {file_path} Model prediction: {model_response} Label: {label_total}")
        print(f"Current Accuracy: {(correct_predictions / (i + 1)) * 100}%")
        print("-"*100)
        i += 1

    print(f"Final Accuracy: {(correct_predictions / i) * 100}%")

print("Starting evaluation...")
print("Evaluating Moondream model...")
evaluate_moondream_model(moondream_model)
print()
print()
print("Evaluating Granite Docling model...")
evaluate_granite_docling_model(granite_docling_model)

Starting evaluation...
Evaluating Moondream model...
File path: 1000-receipt.jpg Model prediction: $56.58 Label: $56.58
Current Accuracy: 100.0%
----------------------------------------------------------------------------------------------------
File path: 1001-receipt.jpg Model prediction: 69.25 Label: $69.25
Current Accuracy: 100.0%
----------------------------------------------------------------------------------------------------
File path: 1002-receipt.jpg Model prediction: $7.61 Label: $7.61
Current Accuracy: 100.0%
----------------------------------------------------------------------------------------------------
File path: 1003-receipt.jpg Model prediction: $5.35 Label: $5.35
Current Accuracy: 100.0%
----------------------------------------------------------------------------------------------------
File path: 1004-receipt.jpg Model prediction: $15.03 Label: $15.03
Current Accuracy: 100.0%
----------------------------------------------------------------------------------------