In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from scipy.stats import spearmanr
import numpy as np

def jaccard(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(set(list1)) + len(set(list2))) - intersection
    return float(intersection) / union

(
    pd.read_json("../data/artifacts/responses.jsonl.gz", lines=True)
    .loc[:, ["response", "style", "kind", "topic", "statements"]]
     .merge(
        (
            pd.read_json("../data/raw/study1_retrieval.jsonl.gz", lines=True)
            .loc[:, ["topic", "references_ids"]]
            .rename(columns={"references_ids": "retrieved_ids"})
        ),
        on="topic",
        how="left"
    )
    .assign(cited_ids=lambda df: df["statements"].apply(lambda cell: list(dict.fromkeys([y for x in cell for y in x["citations"]]).keys())))
    .loc[:, ["topic", "style", "kind", "retrieved_ids", "cited_ids"]]
    .groupby(["topic", "style"])
    .apply(lambda group: pd.Series({
        "retrieved_ids": group["retrieved_ids"].values[0],
        "cited_ids_human": group.loc[group["kind"] == "human", "cited_ids"].values[0],
        "cited_ids_llm": group.loc[group["kind"] == "llm", "cited_ids"].values[0],
    }))
    .assign(
        overlap_human_retrieved=lambda df: df.apply(lambda row: jaccard(row["cited_ids_human"], row["retrieved_ids"]), axis=1),
        overlap_llm_retrieved=lambda df: df.apply(lambda row: jaccard(row["cited_ids_llm"], row["retrieved_ids"]), axis=1),
        overlap_human_llm=lambda df: df.apply(lambda row: jaccard(row["cited_ids_human"], row["cited_ids_llm"]), axis=1),
        correlation_human_retrieved=lambda df: df.apply(lambda row: spearmanr(row["cited_ids_human"], [x for x in row["retrieved_ids"] if x in row["cited_ids_human"]])[0], axis=1),
        correlation_llm_retrieved=lambda df: df.apply(lambda row: spearmanr(row["cited_ids_llm"], [x for x in row["retrieved_ids"] if x in row["cited_ids_llm"]])[0], axis=1),
        correlation_human_llm=lambda df: df.apply(lambda row: spearmanr([x for x in row["cited_ids_human"] if x in row["cited_ids_llm"]], [x for x in row["cited_ids_llm"] if x in row["cited_ids_human"]])[0], axis=1),
        size_human_human=lambda df: df["cited_ids_human"].apply(len),
        size_llm_llm=lambda df: df["cited_ids_llm"].apply(len),
    )
    .reset_index()
    .drop(columns=["cited_ids_human", "cited_ids_llm", "retrieved_ids", "topic"])
    .groupby("style")
    .describe()
    .transpose()
    .reset_index()
    .assign(
        measure=lambda df: df["level_0"].apply(lambda s: s.split("_")[0]),
        first=lambda df: df["level_0"].apply(lambda s: s.split("_")[1]),
        second=lambda df: df["level_0"].apply(lambda s: s.split("_")[2]),
    )
    .drop(columns="level_0")
    .melt(id_vars=["measure", "first", "second", "level_1"])
    .query("level_1 in ['mean', 'std', 'count']")
    .pivot(index=["style", "measure", "first", "second"], columns=["level_1"], values="value")
    #.assign(pm=lambda df: 1.96*df["std"]/df["count"].apply(np.sqrt))
    .loc[:, ["mean", "std"]]
    .reset_index()
    .melt(id_vars=["style", "measure", "first", "second"])
    .pivot(index=["measure", "first", "second"], columns=["style", "level_1"], values="value")
    .loc[:, [("bullet", "mean"), ("bullet", "std"), ("essay", "mean"), ("essay", "std"), ("news", "mean"), ("news", "std")]]
    .reset_index()
    .round(2)
)

In [None]:
from sacrebleu import BLEU

metric = BLEU(lowercase=True, max_ngram_order=8, effective_order=True)

bleu_data = (
    pd.read_json("../data/artifacts/responses.jsonl.gz", lines=True)
    .loc[:, ["response", "style", "kind", "topic", "statements"]]
    .explode("statements")
    .set_index(["response", "style", "kind", "topic"])
    .assign(
        original_text=lambda df: df["statements"].apply(lambda c: c["text"]), 
        citations=lambda df: df["statements"].apply(lambda c: c["citations"]) 
    )
    .explode("citations")
    .reset_index()
    .merge(
        (
            pd.read_json("../data/raw/study1_retrieval.jsonl.gz", lines=True)
            .loc[:, ["references_ids", "references_texts"]]
            .explode(["references_ids", "references_texts"])
            .drop_duplicates()
            .rename(columns={"references_ids": "citations"})
        ),
        on="citations",
        how="left"
    )
    .drop(columns=["statements", "citations"])
    .set_index(["topic", "style", "kind", "response"])
    .fillna("")
    .apply(lambda row: metric.sentence_score(row["original_text"], row["references_texts"].split("\n")), axis=1)
    .apply(lambda row: pd.Series({"score": row.score, "counts": row.counts}))
)

In [None]:
fig, ax = plt.subplots(figsize=(3.64, 1.7), ncols=2, width_ratios=(1, 4), sharey=True, sharex=False)

sns.ecdfplot(
    bleu_data.reset_index().query("kind == 'human'"),
    x="score",
    hue="style",
    ax=ax[1],
)

sns.ecdfplot(
    bleu_data.reset_index().query("kind == 'llm'"),
    x="score",
    hue="style",
    ax=ax[0],
    legend=False,
)
ax[0].set_xlim(0, 25)
ax[1].set_xlim(0, 100)
sns.despine()
plt.show()

fig.savefig("citations-bleu-score.pdf")

In [None]:
sns.lineplot(
    data = (
        bleu_data
        .reset_index()
        .loc[:, ["topic", "style", "kind", "counts"]]
        .set_index(["topic", "style", "kind"])
        .apply(lambda row: pd.Series(dict(zip(range(1, len(row.values[0])+1), row.values[0]))), axis=1)
        .groupby(["style", "kind"])
        .mean()
        .reset_index()
        .melt(id_vars=["style", "kind"])
    ),
    x="variable",
    y="value",
    hue="kind",
    style="style"
)

In [None]:
g = sns.FacetGrid(
    data=(
        pd.read_json("../data/artifacts/responses.jsonl.gz", lines=True)
        .loc[:, ["response", "topic", "style", "kind", "statements"]]
        .assign(
            statement_lengths=lambda df: (
                df["statements"]
                .apply(lambda cell: [len(s["text"]) for s in cell])
            ),
            statement_references=lambda df: (
                df["statements"]
                .apply(lambda cell: [len(s["citations"]) for s in cell])
            )
        )
        .assign(
            statement_lengths_normalized=lambda df: (
                df["statement_lengths"]
                .apply(lambda cell: np.cumsum(cell) / np.sum(cell))
            )
        )
        .drop(columns=["statements", "response", "topic", "statement_lengths"])
        .explode(["statement_references", "statement_lengths_normalized"])
        .query("(statement_references != 0)")
        .rename(columns={"statement_lengths_normalized": "Relative Position"})
    ),
    col="style",
    legend_out=False,
    aspect=0.5,
    height=3.64
)
g.map_dataframe(sns.kdeplot, x="Relative Position", hue="kind")

for ax in g.axes:
    ax[0].set_xlim(0,1)

g.add_legend()
g.savefig("citations-position.pdf")
plt.show()