In [1]:
from google import genai
from google.genai import types
from pydantic import BaseModel
from typing import Literal
import pandas as pd
from pathlib import Path

In [2]:
client = genai.Client(
    vertexai=True,
    location='europe-west4'
)

In [28]:
email_text_1 = Path("../data/accident_reports/accident_report_1.eml").read_text()
print(email_text_1)

From: John Doe <john.doe@example.com>
Date: Thu, 14 Mar 2024 19:19:11 -0800
Subject: Insurance Claim for Accident
To: claims@superinsurance.com
Content-Type: text/plain; charset="UTF-8"

Hello,



A 30-year-old man was injured in a car accident. He was driving his car when another car hit him from the side. He was taken to the hospital with a broken leg. The man works as a lawyer.


Sincerely,
John Doe



In [36]:
def read_accident_report(report_path: Path) -> types.Part:
    return types.Part.from_bytes(
        data=report_path.read_bytes(),
        mime_type="text/plain",
    )

In [40]:
accident_report_files = [
    read_accident_report(report_path)
    for report_path in
    Path("../data/accident_reports").glob("*.eml")
]

In [41]:
class AccidentReport(BaseModel):
    age: int | None
    gender: str | None
    occupation: str | None
    injury: str | None
    body_affected: Literal["ears", "eyes", "legs", "arms", "head", "shoulder"] | None
    category: Literal["accident", "crime", "natural_disaster"] | None

In [42]:
def extract_accident_report(accident_report: types.Part) -> AccidentReport:
    response = client.models.generate_content(
        model='gemini-2.0-flash-001',
        contents=[
            "Find below a customer insurance claim for an accident.",
            accident_report,
        ],
        config=types.GenerateContentConfig(
            temperature=0.2,
            response_mime_type='application/json',
            response_schema=AccidentReport,
        ),
    )
    accident_report: AccidentReport = response.parsed
    return accident_report

In [43]:
accident_reports = [
    extract_accident_report(accident_report) for accident_report in accident_report_files
]
accident_reports

[AccidentReport(age=30, gender='male', occupation='lawyer', injury='broken leg', body_affected='legs', category='accident'),
 AccidentReport(age=None, gender='female', occupation='surgeon', injury='broken arm', body_affected='arms', category='accident'),
 AccidentReport(age=58, gender=None, occupation='teacher', injury='cut wounds', body_affected='shoulder', category='natural_disaster'),
 AccidentReport(age=10, gender='male', occupation='student', injury='fracture', body_affected='arms', category='accident'),
 AccidentReport(age=45, gender=None, occupation='heavy machinery operator', injury='severe crush injuries', body_affected='legs', category='accident'),
 AccidentReport(age=30, gender='male', occupation='lawyer', injury='broken leg', body_affected='legs', category='accident'),
 AccidentReport(age=None, gender='female', occupation='surgeon', injury='broken arm', body_affected='arms', category='accident'),
 AccidentReport(age=58, gender='male', occupation='teacher', injury='cut wound

In [44]:
pd.DataFrame([accident_report.model_dump() for accident_report in accident_reports])

Unnamed: 0,age,gender,occupation,injury,body_affected,category
0,30.0,male,lawyer,broken leg,legs,accident
1,,female,surgeon,broken arm,arms,accident
2,58.0,,teacher,cut wounds,shoulder,natural_disaster
3,10.0,male,student,fracture,arms,accident
4,45.0,,heavy machinery operator,severe crush injuries,legs,accident
5,30.0,male,lawyer,broken leg,legs,accident
6,,female,surgeon,broken arm,arms,accident
7,58.0,male,teacher,cut wounds,shoulder,natural_disaster
8,10.0,male,student,fractured wrist,arms,accident
9,45.0,,heavy machinery operator,crush injuries,legs,accident
