In [2]:
from google import genai
from google.genai import types
from pydantic import BaseModel, Field
import pandas as pd
from pathlib import Path
import requests

In [5]:
client = genai.Client(
    vertexai=True,
    location='europe-west4'
)

In [6]:
pdf_path = Path("../data/ad_annual-report_2024_interactive.pdf")

report_2024 = types.Part.from_bytes(
    data=pdf_path.read_bytes(),
    mime_type="application/pdf",
)

In [7]:
prompt: str = """
Answer the following questions:

0. What accounting standards or principles were applied in the preparation of these statements?
1. Do these financial results include any extraordinary or one-time items?
2. Could you specify the time frame these financial statements represent?
3. Are there any transactions with related parties that are documented in the financial statements?
4. Could you detail the principal elements of operating expenses and their classification?
5. Have there been any alterations to accounting policies or practices since prior reporting periods?
6. What process is used to determine and reveal major accounting estimates?
7. What accounting and presentation methods are used for taxes in these financial statements?
8. Are there any contractual commitments or obligations outlined in these statements?
9. Could you describe the nature and extent of the audit or review procedures carried out on these statements?

... using the attached annual statement PDF.
"""

In [8]:
class AnswerSource(BaseModel):
    citation: str = Field(description="A text citation of the answer in the source document.")
    document: str
    page: int
    

class AuditAnswer(BaseModel):
    question: str
    answer: str | None
    sources: list[AnswerSource]

class AuditAnswers(BaseModel):
    answers: list[AuditAnswer]

In [11]:
response = client.models.generate_content(
    model='gemini-2.0-flash-001',
    contents=[
        prompt,
        report_2024
    ],
    config=types.GenerateContentConfig(
        temperature=0.2,
        response_mime_type='application/json',
        response_schema=AuditAnswers,
    ),
)
audit_answers: AuditAnswers = response.parsed
audit_answers

AuditAnswers(answers=[AuditAnswer(question='What accounting standards or principles were applied in the preparation of these statements?', answer='These financial statements have been prepared in accordance with International Financial Reporting Standards (IFRSs) as adopted by the EU and also comply with the financial reporting requirements included in Part 9 of Book 2 of the Dutch Civil Code.', sources=[AnswerSource(citation='These financial statements have been prepared in accordance with International Financial Reporting Standards (IFRSs) as adopted by the EU and also comply with the financial reporting requirements included in Part 9 of Book 2 of the Dutch Civil Code.', document='annual statement PDF', page=264)]), AuditAnswer(question='Do these financial results include any extraordinary or one-time items?', answer='The financial results are adjusted for impairments of non-current assets, gains and losses on the sale of assets, gains and losses on leases and subleases, restructuri

In [None]:
df = pd.DataFrame(
    [
        answer.model_dump() for answer in audit_answers.answers
    ]
)
df

## Going bigger

In [None]:
def online_pdf_to_part(url: str) -> types.Part:
    response = requests.get(url)
    return types.Part.from_bytes(
        data=response.content,
        mime_type="application/pdf",
    )

In [None]:
prompt_compare_years: str = """
Answer the following questions:

- What is the company's revenue for each year?
- Have there been any alterations to accounting policies or practices since prior reporting periods?
- How has the company's strategy changed over the years?
- Summarise the most important changes in the company's financial position over the years.
- Summarise the most important strategic changes over the years.
- What were the key performance indicators for Ahold Delhaize in 2020, and how did they compare to the targets set for 2020, as well as the actual results achieved in 2015?
- How did the key audit matters related to goodwill and brand names change from 2015 to 2020?
- How did the number of associates change from 2015 to 2020?
- What was the dividend per common share in 2015?

... using the attached annual statements.
"""

In [40]:
response_compare_years = client.models.generate_content(
    model='gemini-2.0-flash-001',
    contents=[
        prompt_compare_years,
        "Ahold 2015 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/as2deaxh/ahold_ar15_fullreport_interactive.pdf"),
        "Delhaize 2015 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/4s3h52sq/2015_annualreport_en.pdf"),
        "Ahold Delhaize 2016 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/s1pjua0t/ahold-delhaize-annual-report-2016.pdf"),
        "Ahold Delhaize 2017 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/g1ujnffp/180302_aholddelhaize_annualreport_2017.pdf"),
        "Ahold Delhaize 2018 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/rsjhnqh4/ahold-delhaize-annual-report-2018-interactive.pdf"),
        "Ahold Delhaize 2019 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/1xbnvzzk/ahold-delhaize-annual-report-2019.pdf"),
        "Ahold Delhaize 2020 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/emmkj0we/annual_report_2020_full_links-1.pdf"),
        "Ahold Delhaize 2021 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/1l5iqhvt/full-annual-report-2021.pdf"),
        "Ahold Delhaize 2022 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/vy4neu1n/ar-2022-ahold-delhaize-interactive-final.pdf"),
        "Ahold Delhaize 2023 annual report:",
        online_pdf_to_part("https://www.aholddelhaize.com/media/clkbibno/ad_ar23_interactive.pdf"),
        "Ahold Delhaize 2024 annual report:",
        report_2024
    ],
    config=types.GenerateContentConfig(
        temperature=0.2,
        response_mime_type='application/json',
        response_schema=AuditAnswers,
    ),
)
audit_answers_compare_years: AuditAnswers = response_compare_years.parsed
audit_answers_compare_years

AuditAnswers(answers=[AuditAnswer(question="What is the company's revenue for each year?", answer="Ahold's net sales for 2015 was €38.2bn. Delhaize's revenues for 2015 was €24.4 B. Ahold Delhaize's net sales for 2016 was €49.7bn, for 2017 was €62.9bn, for 2018 was €62.89bn, for 2019 was €66.26bn, and for 2020 was €74.7bn.", sources=[AnswerSource(citation='In 2015, we achieved net sales of €38.2bn and generated free cash flow of €1.2bn', document='Ahold 2015 annual report', page=5), AnswerSource(citation='Revenues: €872 M Group underlying operating profit; €24.4 B Revenues', document='Delhaize 2015 annual report', page=5), AnswerSource(citation='Net sales €49,695 million', document='Ahold Delhaize 2016 annual report', page=8), AnswerSource(citation='Net sales €62.890 bn', document='Ahold Delhaize 2018 annual report', page=7), AnswerSource(citation='Net sales €66.260 bn', document='Ahold Delhaize 2019 annual report', page=6), AnswerSource(citation='Net sales €74.736 bn', document='Ahold 

In [44]:
df_compare_years = pd.DataFrame(
    [
        answer.model_dump() for answer in audit_answers_compare_years.answers
    ]
)
df_compare_years

Unnamed: 0,question,answer,sources
0,What is the company's revenue for each year?,Ahold's net sales for 2015 was €38.2bn. Delhai...,"[{'citation': 'In 2015, we achieved net sales ..."
1,Have there been any alterations to accounting ...,Ahold Delhaize's 2020 report mentions the impa...,[{'citation': 'Due to the implementation of IF...
2,How has the company's strategy changed over th...,"In 2015, Ahold's strategic pillars included in...",[{'citation': 'These six pillars help us achie...
3,Summarise the most important changes in the co...,The most significant change in the company's f...,"[{'citation': 'On July 24, 2016, the merger be..."
4,Summarise the most important strategic changes...,The most important strategic change was the me...,"[{'citation': 'On July 24, 2016, the merger be..."
5,What were the key performance indicators for A...,Key performance indicators for Ahold Delhaize ...,"[{'citation': 'Net sales €74.736 bn', 'documen..."
6,How did the key audit matters related to goodw...,"In 2015, the key audit matters included recogn...",[{'citation': 'Key audit matters: Recognition ...
7,How did the number of associates change from 2...,"The number of associates increased from 236,00...","[{'citation': 'Associates 236,000', 'document'..."
8,What was the dividend per common share in 2015?,The dividend per common share in 2015 was €0.52.,[{'citation': 'Dividend per common share €0.52...
9,Come up with 1 question that can only be answe...,How did the company's approach to sustainabili...,[]


## Cost estimates

Input is primarly images. Gemini converts PDFs to images under the hood. To estimate we use $0.15 per 1M Input tokens <sup> [\[1\]](https://cloud.google.com/skus?hl=en&filter=gemini%20Gemini%202.0%20Flash%20input%20vertex%20image&currency=EUR) [\[2\]](https://cloud.google.com/vertex-ai/generative-ai/pricing#token-based-pricing) </sup>.

Estimate cost for processing annual report of 2024:

In [42]:
response.usage_metadata.total_token_count \
/ 1_000_000 \
* 0.15

0.0150099

Estimate cost for all reports together (2,906 pages):

In [43]:
response_compare_years.usage_metadata.total_token_count \
/ 1_000_000 \
* 0.15

0.11434305

... this becomes _twice_ as cheap when predicting with [Batch API](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/batch-prediction-gemini) instead.