In [None]:
import pandas as pd
import textstat
import seaborn as sns
import matplotlib.pyplot as plt
import aquarel

In [None]:
df = (
    pd.read_json("../data/artifacts/responses.jsonl.gz", lines=True)
    .loc[:, ["response", "kind", "style", "references_ids", "references_texts", "statements"]]
    .explode("statements")
    .assign(
        text=lambda df: df["statements"].apply(lambda x: x["text"]),
        source=lambda df: df["statements"].apply(lambda x: x["citations"])
    )
    .explode("source")
    .dropna()
    .assign(
        source=lambda df: df.apply(lambda row: row["references_texts"][row["references_ids"].index(row["source"])], axis=1)
    )
    .loc[:, ["response", "kind", "style", "text", "source"]]
    .assign(
        response_readability=lambda df: df["text"].apply(textstat.flesch_reading_ease),
        reference_readability=lambda df: df["source"].apply(textstat.flesch_reading_ease)
    )
    .drop(columns=["text", "source"])
    .groupby(["kind", "style", "response"])
    .apply(pd.Series.median)
    .reset_index()
    .drop(columns=["response"])
)
df

In [None]:
with (
    aquarel.load_theme("boxy_light")
    .set_axes(top=False, left=False, right=False, bottom=True)
):
    g = sns.FacetGrid(
        data=df.melt(id_vars=["kind", "style"]),
        row="kind",
        legend_out=True,
        aspect=3,
        sharex=True,
        height=3.64/2,
    )
    
    g.map_dataframe(sns.kdeplot, x="value", hue="variable",common_norm=True, common_grid=True)

    
for ax in g.axes:
    ax[0].set_xlim(0,100)

plt.tight_layout()
g.savefig("figure-readibility.pdf")
plt.show()
