In [1]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
from pastel.parsers import parse_assertion, parse_evidence, check_grammar
from pastel.helpers import load_images_from_directory

import logfire
import pandas as pd
from IPython.display import display, Markdown
import asyncio

from pastel.reference import GLOSSARY
import os

# Show all rows in pandas DataFrames
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

logfire.configure(token=os.getenv("PYDANTIC_LOGFIRE_TOKEN"))
logfire.instrument_openai()
logfire.instrument_anthropic()

In [None]:
display(Markdown(GLOSSARY))

In [None]:
lrs = pd.read_excel("../data/lrs.xlsx")
insights = pd.read_excel("../data/Insights.xlsx")

sample_programnames = [
    "pdl6muw8mJl9DL7bVO40nFOroodOnSFBG5e7zw+nAW32k7BiKehq6oLHwyItBjfw",
    "zOh+1hfGBc0APeN39xBT9Gw3m4vCj5fzPe4I4Ix86FlsyGY2I2GRkqPVee1eQqQe",
]

display(Markdown("# Loss Ratio Data Set"))
display(Markdown("**Spreadsheet:** lrs.xslx"))
display(lrs.query("programname in @sample_programnames"))
display(Markdown("<br><br>"))

display(Markdown("# Insights Data Set"))
display(Markdown("**Filename:** Insights.xlsx"))
# display(insights.query("programname in @sample_programnames"))
display(insights)

In [None]:
image_dir = "../data/0wvTBiDyAsazB20qDhyomFS505t1ko+Dne-eZsDtqddeA4-SnAMhg8Bctev9RGU1"
images = load_images_from_directory(image_dir)

index = -1

display(images[index].image)
display(Markdown("**Base64 Encoding**: *" + images[index].encoded[:75] + " ..." + "*"))

In [None]:
tasks = [asyncio.create_task(parse_assertion(insight)) for insight in insights["insight"]]
insights["conclusions"] = await asyncio.gather(*tasks)

In [None]:
tasks = [
    asyncio.create_task(parse_evidence(row.conclusions, row.insight))
    for row in insights.itertuples()
]
insights["pastels"] = await asyncio.gather(*tasks)

In [None]:
tasks = [asyncio.create_task(check_grammar(insight.strip())) for insight in insights["insight"]]
insights["grammar"] = await asyncio.gather(*tasks)

In [None]:
insights[["programname", "insight", "pastels", "grammar"]].style.set_properties(
    **{"text-align": "left"}
).set_table_styles([{"selector": "th", "props": [("text-align", "left")]}])