In [None]:
from math import nan

from pandas import read_excel, DataFrame, NamedAgg
from scipy.stats import kendalltau
from seaborn import FacetGrid, histplot, set_theme

In [None]:
df: DataFrame = read_excel("../data/06-study.xlsx", sheet_name="05 Quality", header=0)
df.rename(columns={
    "1.1.": "qid",
    "1.2.": "docno",
    "1.3.": "query",
    "1.4.": "description",
    "Relevanzkriterium": "narrative",
    "1.5.": "url",
    "1.6.": "object1",
    "1.7.": "object2",
    "2.1.": "stance_mentioned_objects",
    "2.1.A.": "stance_mentioned_object1",
    "2.1.B.": "stance_mentioned_object2",
    "2.2.": "stance_most_detailed_objects",
    "2.2.A.": "stance_most_detailed_object1",
    "2.2.B.": "stance_most_detailed_object2",
    "2.3.": "stance_favored_objects",
    "2.3.1.": "stance_strength",
    "2.3.A.": "stance_favored_object1",
    "2.3.B.": "stance_favored_object2",
    "I-Haltung": "stance_score",
    "3.1.": "content_information_depth",
    "3.2.": "content_wordiness",
    "3.2.score": "content_wordiness_score",
    "3.3.": "content_rhetoric",
    "3.3.score": "content_rhetoric_score",
    "I-Inhalt": "content_score",
    "4.1.1.": "usability_design_elements_text",
    "4.1.2.": "usability_design_elements_tables",
    "4.1.3.": "usability_design_elements_pictures_graphics",
    "4.1.4.": "usability_design_elements_key_points",
    "4.1.5.": "usability_design_elements_question_answer",
    "4.1.6.": "usability_design_elements_interviews",
    "4.1.7.": "usability_design_elements_animations_interactive_media",
    "4.1.99.": "usability_design_elements_none",
    "4.2.": "usability_readability",
    "4.2.score": "usability_readability_score",
    "I-Nutzbarkeit": "usability_score",
    "5.1.": "credibility_source",
    "5.1.score": "credibility_source_score",
    "5.2.": "credibility_author",
    "5.2.score": "credibility_author_score",
    "5.3.": "credibility_truthfulness",
    "5.3.score": "credibility_truthfulness_score",
    "5.4.": "credibility_verifiability",
    "I-Glaubwürdigkeit": "credibility_score",
    "6.1.": "up_to_dateness_date",
    "6.1.score": "up_to_dateness_date_score",
    "6.2.": "up_to_dateness_updates",
    "6.2.score": "up_to_dateness_updates_score",
    "I-Aktualität": "up_to_dateness_score",
    "6.3.": "???",
    "7": "relevance",
    "7.1.": "relevance_score",
    "I-Qualität": "quality_score",

}, inplace=True)
df["content_information_depth_score"] = df["content_information_depth"]
df["usability_design_elements_score"] = (
        df["usability_design_elements_text"]
        + df["usability_design_elements_tables"]
        + df["usability_design_elements_pictures_graphics"]
        + df["usability_design_elements_key_points"]
        + df["usability_design_elements_question_answer"]
        + df["usability_design_elements_interviews"]
        - df["usability_design_elements_animations_interactive_media"]
)
df["credibility_verifiability_score"] = df["credibility_verifiability"]
df.drop(columns=[
    "query",
    "description",
    "narrative",
    "url",
    "object1",
    "object2",
    "stance_mentioned_objects",
    "stance_mentioned_object1",
    "stance_mentioned_object2",
    "stance_most_detailed_objects",
    "stance_most_detailed_object1",
    "stance_most_detailed_object2",
    "stance_favored_objects",
    # "stance_strength",
    "stance_favored_object1",
    "stance_favored_object2",
    "content_information_depth",
    "content_information_depth_score",
    "content_wordiness",
    "content_wordiness_score",
    "content_rhetoric",
    "content_rhetoric_score",
    "usability_design_elements_text",
    "usability_design_elements_tables",
    "usability_design_elements_pictures_graphics",
    "usability_design_elements_key_points",
    "usability_design_elements_question_answer",
    "usability_design_elements_interviews",
    "usability_design_elements_animations_interactive_media",
    "usability_design_elements_none",
    "usability_design_elements_score",
    "usability_readability",
    "usability_readability_score",
    "credibility_source",
    "credibility_source_score",
    "credibility_author",
    "credibility_author_score",
    "credibility_truthfulness",
    "credibility_truthfulness_score",
    "credibility_verifiability",
    "credibility_verifiability_score",
    "up_to_dateness_date",
    "up_to_dateness_date_score",
    "up_to_dateness_updates",
    "up_to_dateness_updates_score",
    # "relevance",
    # "relevance_score",
    "???",
], inplace=True)
df["rank"] = df.groupby("qid")["docno"].rank(method="dense", ascending=True).astype(int)
df

In [None]:
df_corr = df.copy()
df_corr = df_corr[df_corr["quality_score"].notna()]
kendalltau(-df_corr["rank"], df_corr["quality_score"])

In [None]:
df_corr = df.copy()
df_corr = df_corr[df_corr["relevance_score"].notna()]
kendalltau(-df_corr["rank"], df_corr["relevance_score"])

In [None]:
df_corr = df.copy()
df_corr = df_corr[df_corr["quality_score"].notna() & df_corr["relevance_score"].notna()]
kendalltau(df_corr["quality_score"], df_corr["relevance_score"])

In [None]:
no_quality_topics = df[df["quality_score"].isna()]["qid"].unique()
no_stance_topics = df[df["stance_score"].isna()]["qid"].unique()

In [None]:
df_topics = df.groupby("qid").aggregate(
    quality_score_mean=NamedAgg(column="quality_score", aggfunc="mean"),
    quality_score_std=NamedAgg(column="quality_score", aggfunc="std"),
    stance_score_mean=NamedAgg(column="stance_score", aggfunc="mean"),
    stance_score_std=NamedAgg(column="stance_score", aggfunc="std"),
).reset_index()
df_topics.loc[df_topics["qid"].isin(no_quality_topics), ["quality_score_mean", "quality_score_std"]] = nan
df_topics.loc[df_topics["qid"].isin(no_stance_topics), ["stance_score_mean", "stance_score_std"]] = nan
df_topics.sort_values(by=["quality_score_mean", "stance_score_mean"], ascending=False, inplace=True)
df_topics = df_topics.round(decimals=2)
df_topics

In [None]:
# Copy-paste into user study evaluation notebook as the quality threshold.
df["quality_score"].median()

In [None]:
cols_plot = {
    "content_score": "Content",
    "usability_score": "Usability",
    "credibility_score": "Credibility",
    "up_to_dateness_score": "Up-to-dateness",
    "quality_score": "Quality",
}
df_plot = df.melt(
    id_vars=["qid", "docno"],
    value_vars=cols_plot.keys(),
)
for col, col_name in cols_plot.items():
    col_mean = df[col].mean().round(decimals=2)
    col_median = df[col].median().round(decimals=2)
    col_std = df[col].std().round(decimals=2)
    col_name = f"{col_name}\n(mean: {col_mean}, std: {col_std}\nmedian: {col_median})"
    df_plot["variable"].replace(col, col_name, inplace=True)
df_plot

In [None]:
set_theme(
    style="ticks",
    palette="colorblind",
    font_scale=1.0,
    font="sans-serif",
)
plot = FacetGrid(
    df_plot,
    col="variable",
)
plot.map_dataframe(
    histplot,
    x="value",
)
plot.set_titles(template="{col_name}")
plot.set_xlabels(label="Score")
plot.savefig("../data/figures/quality-score-distributions.pdf")
plot