# Llama 3.2 Image Extraction and LangGraph

In this example, we'll use the `llama3.2-vision` model to extract information from an image and create an agentic pipeline (using LangGraph) to process the image and extract the relevant information.

The image we'll use is an example invoice photo.
# 
# ![Example Invoice Photo](./files/example_invoice_photo.png)
# 


In [19]:
import base64
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import PydanticToolsParser

from pydantic import BaseModel, Field
from typing import List

with open("files/example_invoice_photo.png", "rb") as image_file:
    image_data = base64.b64encode(image_file.read()).decode("utf-8")


class Invoice(BaseModel):
    """A Pydantic model representing an invoice document."""
    invoice_number: str = Field(..., description="The unique identifier for this invoice")
    date: str = Field(..., description="The date when the invoice was issued") 
    total_amount: float = Field(..., description="The total amount to be paid, including tax if applicable")
    vendor_name: str = Field(..., description="The name of the vendor/company issuing the invoice")
    vendor_address: str | None = Field(None, description="The physical address of the vendor")
    line_items: List | None = Field(None, description='List of items/services being billed, with their individual costs. For example [{"item_name": "Front and rear brake cables", "cost": 100.0}]')
    payment_terms: str | None = Field(None, description="The terms of payment, e.g. 'Net 30'")
    due_date: str | None = Field(None, description="The date by which the payment must be made")
    tax_amount: float | None = Field(None, description="The amount of tax charged")
    subtotal: float | None = Field(None, description="The total amount before tax")

system_message = SystemMessage(content="You return the invoice information as a structured tool call")

human_message = HumanMessage(
    content=[
        {
            "type": "image_url",
            "image_url": {"url": f"data:image/png;base64,{image_data}"},
        },
    ],
)

prompt = ChatPromptTemplate.from_messages(
    [
        system_message,
        human_message
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0).bind_tools([Invoice])

chain = prompt | llm | PydanticToolsParser(tools=[Invoice])

structured_output = chain.invoke({})
print(structured_output[0].model_dump_json(indent=2))


{
  "invoice_number": "US-001",
  "date": "11/02/2019",
  "total_amount": 154.06,
  "vendor_name": "East Repair Inc.",
  "vendor_address": "1912 Harvest Lane, New York, NY 12210",
  "line_items": [
    {
      "item_name": "Front and rear brake cables",
      "cost": 100.0
    },
    {
      "item_name": "New set of pedal arms",
      "cost": 30.0
    },
    {
      "item_name": "Labor 3hrs",
      "cost": 15.0
    }
  ],
  "payment_terms": "Net 15",
  "due_date": "26/02/2019",
  "tax_amount": 9.06,
  "subtotal": 145.0
}


In [None]:
import base64
from langchain_core.messages import HumanMessage
from langchain_ollama import ChatOllama

model = ChatOllama(model="llama3.2-vision")

with open("files/example_invoice_photo.png", "rb") as image_file:
    image_data = base64.b64encode(image_file.read()).decode("utf-8")

message = HumanMessage(
    content=[
        {"type": "text", "text": "Describe the contents of this image"},
        {
            "type": "image_url",
            "image_url": {"url": f"data:image/png;base64,{image_data}"},
        },
    ],
)
response = model.invoke([message])
print(response.content)