In [None]:
import instructor
import vertexai
from vertexai.generative_models import GenerativeModel, Part
from pydantic import BaseModel, Field
import pandas as pd
from pathlib import Path

In [2]:
vertexai.init(location="europe-west4")

client = instructor.from_vertexai(
    client=GenerativeModel("gemini-2.0-flash"),
    mode=instructor.Mode.VERTEXAI_JSON,
)

In [8]:
pdf_path = Path("../data/ad_annual-report_2024_interactive.pdf")

report = Part.from_data(
    data=pdf_path.read_bytes(),
    mime_type="application/pdf",
)

In [77]:
prompt: str = """
Answer the following questions:

1. Could you specify the time frame these financial statements represent? 
2. What accounting standards or principles were applied in the preparation of these statements? 
3. Have there been any alterations to accounting policies or practices since prior reporting periods? 
4. What process is used to determine and reveal major accounting estimates? 
5. What critical assumptions and judgments were utilized in the preparation of these financial statements? 
6. How would you describe the organization's financial standing and performance as reflected in these statements? 
7. Do these financial results include any extraordinary or one-time items? 
8. Are there any transactions with related parties that are documented in the financial statements? 
9. Could you detail the principal elements of operating expenses and their classification? 
10. Are there any asset impairments or write-downs shown in these statements? 
11. What accounting treatment is applied to changes in fair value, particularly for financial instruments? 
12. Do the financial statements disclose any contingent liabilities or ongoing legal issues? 
13. Are there any post-reporting period events that might affect the financial results? 
14. How do these statements depict the organization's liquidity and cash flow situation? 
15. Have there been any notable shifts in working capital ratios relative to previous periods? 
16. Could you describe the organization's debt composition and the presentation of interest expenses? 
17. What accounting and presentation methods are used for taxes in these financial statements? 
18. Are there any contractual commitments or obligations outlined in these statements? 
19. Could you provide a detailed breakdown of equity, including movements in retained earnings? 
20. If relevant, what accounting treatment is applied to foreign currency transactions and translation? 
21. Are there any significant differences or contradictions found within these financial statements? 
22. Could you explain the organization's approach to recognizing revenue and its implementation? 
23. What process is used to determine and disclose provisions for doubtful accounts and bad debts? 
24. Are there any major events or transactions that might affect the ability to compare these statements with others? 
25. How does the organization handle risk management, and how is this communicated? 
26. What accounting treatment is used for gains or losses from asset disposals in these statements? 
27. Are there any non-operating revenues or expenses that influence these financial statements? 
28. Could you describe the nature and extent of the audit or review procedures carried out on these statements? 

... using the attached annual statement PDF.
"""

In [None]:
class AnswerSource(BaseModel):
    page: int = Field(description="The page number in the source document where the answer is found.")
    citation: str = Field(description="An exact text citation of the answer in the source document.")
    

class AuditAnswer(BaseModel):
    question: str
    answer: str = Field(..., description="The extracted answer to the question.")
    sources: list[AnswerSource] = Field(description="Pieces of evidence from the source document that support the answer.")

class AuditAnswers(BaseModel):
    answers: list[AuditAnswer]


audit_answers: AuditAnswers = client.create(
    messages=[
        prompt,
        report
    ],
    response_model=AuditAnswers,
)
audit_answers

AuditAnswers(answers=[AuditAnswer(question='Could you specify the time frame these financial statements represent?', answer='The financial statements represent the financial year ending on December 29, 2024, which consisted of 52 weeks.', sources=[AnswerSource(page=42, citation='Net sales for the financial year ending on December 29, 2024, were €89.4bn, an increase of 0.7% compared to net sales for the year ending on December 31, 2023.'), AnswerSource(page=42, citation='Net sales for the financial year ending on December 29, 2024, were €89,356 million, an increase of €622 million, or 0.7%, compared to net sales of €88,734 million for the financial year ending on December 31, 2023.'), AnswerSource(page=86, citation="Ahold Delhaize's financial year is a 52- or 53-week period ending on the Sunday nearest to December 31 for the Company and our European operations, or the Saturday before the Sunday nearest to December 31 for our operations in the United States. The financial year 2024 consi

In [69]:
df = pd.DataFrame(
    [
        answer.model_dump() for answer in audit_answers.answers
    ]
)
df

Unnamed: 0,question,answer,sources
0,Could you specify the time frame these financi...,The financial statements represent the financi...,"[{'page': 42, 'citation': 'Net sales for the f..."
1,What accounting standards or principles were a...,The financial statements have been prepared in...,"[{'page': 86, 'citation': 'These financial sta..."
2,Have there been any alterations to accounting ...,"Net sales, online sales and net sales growth f...","[{'page': 41, 'citation': 'Short-term borrowin..."
3,What process is used to determine and reveal m...,The financial reporting requires us to make ju...,"[{'page': 267, 'citation': 'Estimates and unce..."
4,What critical assumptions and judgments were u...,Estimates and uncertainty include significant ...,"[{'page': 86, 'citation': 'As we have elected ..."
5,How would you describe the organization's fina...,"In 2024, Ahold Delhaize achieved strong underl...","[{'page': 9, 'citation': 'We're thankful that,..."
6,Do these financial results include any extraor...,The (IFRS) operating income for 2024 include a...,"[{'page': 42, 'citation': 'IFRS operating inco..."
7,Are there any transactions with related partie...,Ahold Delhaize has entered into arrangements w...,"[{'page': 316, 'citation': 'Ahold Delhaize has..."
8,Could you detail the principal elements of ope...,The principal elements of operating expenses a...,"[{'page': 35, 'citation': 'Operating expenses ..."
9,Are there any asset impairments or write-downs...,"Yes, net impairment losses of property, plant ...","[{'page': 46, 'citation': 'Ahold Delhaize reco..."


Explode _sources_ column and remove question/answer duplicates for a better table display.

In [None]:
df_exploded = df.explode("sources")
df_duplicates = df_exploded.duplicated(["question", "answer"])
df_exploded.loc[df_duplicates, "question"] = ""
df_exploded.loc[df_duplicates, "answer"] = ""
df_exploded

Unnamed: 0,question,answer,sources
0,Could you specify the time frame these financi...,The financial statements represent the financi...,"{'page': 42, 'citation': 'Net sales for the fi..."
0,,,"{'page': 42, 'citation': 'Net sales for the fi..."
0,,,"{'page': 86, 'citation': 'Ahold Delhaize's fin..."
1,What accounting standards or principles were a...,The financial statements have been prepared in...,"{'page': 86, 'citation': 'These financial stat..."
2,Have there been any alterations to accounting ...,"Net sales, online sales and net sales growth f...","{'page': 41, 'citation': 'Short-term borrowing..."
3,What process is used to determine and reveal m...,The financial reporting requires us to make ju...,"{'page': 267, 'citation': 'Estimates and uncer..."
4,What critical assumptions and judgments were u...,Estimates and uncertainty include significant ...,"{'page': 86, 'citation': 'As we have elected t..."
5,How would you describe the organization's fina...,"In 2024, Ahold Delhaize achieved strong underl...","{'page': 9, 'citation': 'We're thankful that, ..."
5,,,"{'page': 9, 'citation': 'We delivered on our p..."
6,Do these financial results include any extraor...,The (IFRS) operating income for 2024 include a...,"{'page': 42, 'citation': 'IFRS operating incom..."
