In [1]:
import json
import pandas as pd

In [2]:
with open("results/stats_topics_settings.json", "r") as file:
    data = json.load(file)

with open("results/p_values_topics_settings.json", "r") as file:
    p_values = json.load(file)
    
# Convert to DataFrame
chat_stat_metrics = ["chat_duration_time", "EE_processing_times", "num_EE_queries", "num_EX_sentences", "num_EX_words"]
rows = []
for topic, conditions in data.items():
    for condition, values in conditions.items():
        for metric, value in values.items():
            if metric in chat_stat_metrics:
                value = value.replace("+-", "$\\pm$ ").replace("%", "\\%").replace("0:", "")
                if p_values[topic][metric] < 0.05:
                    value = value+"\\textsuperscript{$\\dagger$}"
                rows.append([metric.replace("_", ""), topic, condition, value])

df = pd.DataFrame(rows, columns=["Metric", "Topic", "Setting", "Value"])

# Pivot to create multi-column structure
df_pivot = df.pivot(index="Metric", columns=["Topic", "Setting"], values="Value")

desired_order = [
    "chatdurationtime", 
    "numEEqueries", 
    "EEprocessingtimes", 
    "numEXsentences", 
    "numEXwords"
]

# Reorder the columns
df_pivot = df_pivot.reindex(desired_order)

# Generate LaTeX table with multi-columns
latex_table = df_pivot.T.to_latex(escape=False, multirow=True, column_format="ll" + "l" * len(chat_stat_metrics))

# Print or save LaTeX table
print(latex_table)


with open("results/stats_settings.json", "r") as file:
    data = json.load(file)

with open("results/p_values_settings.json", "r") as file:
    p_values = json.load(file)
    
# Convert to DataFrame
chat_stat_metrics = ["chat_duration_time", "EE_processing_times", "num_EE_queries", "num_EX_sentences", "num_EX_words"]
rows = []
for condition, values in data.items():
    for metric, value in values.items():
        if metric in chat_stat_metrics:
            value = value.replace("+-", "$\\pm$ ").replace("%", "\\%").replace("0:", "")
            if p_values[metric] < 0.05:
                value = value+"\\textsuperscript{$\\dagger$}"
            rows.append([metric.replace("_", ""), "\\textbf{Overall}", condition, value])

df = pd.DataFrame(rows, columns=["Metric", "Topic", "Setting", "Value"])

# Pivot to create multi-column structure
df_pivot = df.pivot(index="Metric", columns=["Topic", "Setting"], values="Value")

desired_order = [
    "chatdurationtime", 
    "numEEqueries", 
    "EEprocessingtimes", 
    "numEXsentences", 
    "numEXwords"
]

# Reorder the columns
df_pivot = df_pivot.reindex(desired_order)

# Generate LaTeX table with multi-columns
latex_table = df_pivot.T.to_latex(escape=False, multirow=True, column_format="ll" + "l" * len(chat_stat_metrics))

# Print or save LaTeX table
print(latex_table)

\begin{tabular}{lllllll}
\toprule
 & Metric & chatdurationtime & numEEqueries & EEprocessingtimes & numEXsentences & numEXwords \\
Topic & Setting &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{quarto} & base & 11:49 $\pm$ 216s\textsuperscript{$\dagger$} & 9.3 $\pm$ 4.4 & 01:36 $\pm$ 64s & 14.0 $\pm$ 4.3\textsuperscript{$\dagger$} & 15.8 $\pm$ 2.0\textsuperscript{$\dagger$} \\
 & enhanced & 13:07 $\pm$ 199s\textsuperscript{$\dagger$} & 10.5 $\pm$ 4.7 & 01:27 $\pm$ 40s & 10.4 $\pm$ 3.7\textsuperscript{$\dagger$} & 17.3 $\pm$ 2.5\textsuperscript{$\dagger$} \\
\cline{1-7}
\multirow[t]{2}{*}{sleep} & base & 12:05 $\pm$ 221s\textsuperscript{$\dagger$} & 6.9 $\pm$ 2.6\textsuperscript{$\dagger$} & 02:07 $\pm$ 73s & 19.3 $\pm$ 4.7\textsuperscript{$\dagger$} & 16.2 $\pm$ 3.2\textsuperscript{$\dagger$} \\
 & enhanced & 13:39 $\pm$ 161s\textsuperscript{$\dagger$} & 9.4 $\pm$ 3.5\textsuperscript{$\dagger$} & 01:47 $\pm$ 69s & 12.7 $\pm$ 3.6\textsuperscript{$\dagger$} & 17.2 $\pm$ 2.0\textsuperscrip

In [3]:
# Load the data
with open("results/stats_settings.json", "r") as file:
    data = json.load(file)

with open("results/p_values_settings.json", "r") as file:
    p_values = json.load(file)
    
# Define the metrics we're interested in
chat_stat_metrics = ["chat_time_span", "EE_read_time", "num_EE_queries", "num_EX_sentences", "num_EX_words"]
rows = []

# Prepare the data for the DataFrame
for condition, values in data.items():
    for metric, value in values.items():
        if metric in chat_stat_metrics:
            value = value.replace("+-", "$\\pm$ ").replace("%", "\\%").replace("0:", "")
            if p_values[metric] < 0.05:
                value = value + "\\textsuperscript{$\\dagger}"
            rows.append([metric.replace("_", " "), condition, value])

# Create DataFrame
df = pd.DataFrame(rows, columns=["Metric", "Setting", "Value"])

# Sort and reorder the DataFrame
desired_order = [
    "chat time span", 
    "num EE queries", 
    "EE read time", 
    "num EX sentences", 
    "num EX words"
]

# Reindex the DataFrame to follow the desired order
df["Metric"] = pd.Categorical(df["Metric"], categories=desired_order, ordered=True)
df = df.sort_values("Metric")

# Set up the DataFrame for multirow
df_pivot = df.pivot(index='Setting', columns='Metric', values='Value')

# Generate LaTeX table with multirow using `to_latex`
latex_table = df_pivot.to_latex(escape=False, multirow=True, column_format="l" + "l" * len(df_pivot.columns))

# Print LaTeX table
print(latex_table)

\begin{tabular}{llll}
\toprule
Metric & num EE queries & num EX sentences & num EX words \\
Setting &  &  &  \\
\midrule
base & 8.2 $\pm$ 3.8\textsuperscript{$\dagger} & 17.8 $\pm$ 5.3\textsuperscript{$\dagger} & 16.9 $\pm$ 2.6\textsuperscript{$\dagger} \\
enhanced & 9.3 $\pm$ 3.7\textsuperscript{$\dagger} & 12.1 $\pm$ 3.6\textsuperscript{$\dagger} & 17.8 $\pm$ 2.2\textsuperscript{$\dagger} \\
\bottomrule
\end{tabular}



In [4]:
with open("results/stats_topics_settings.json", "r") as file:
    data = json.load(file)

with open("results/p_values_topics_settings.json", "r") as file:
    p_values = json.load(file)

with open("results/p_values_topics_settings_pre_post.json", "r") as file:
    p_values_prior_post = json.load(file)
    
# Convert to DataFrame
chat_stat_metrics = ["pre_motivation", "post_motivation", "pre_subj_comprehension", "post_subj_comprehension", "post_obj_comprehension", "post_enabledness", "post_constructiveness"]
rows = []
for topic, conditions in data.items():
    for condition, values in conditions.items():
        for metric, value in values.items():
            if metric in chat_stat_metrics:
                value = value.replace("+-", "$\\pm$ ").replace("%", "\\%").replace("0:", "")
                metric_temp = metric.replace("pre_", "").replace("post_", "")
                if p_values[topic][metric] < 0.05 and metric_temp in p_values_prior_post[topic][condition] and p_values_prior_post[topic][condition][metric_temp] < 0.05:
                    value = value + "\\textsuperscript{$\\dagger\\ddagger$}"
                elif p_values[topic][metric] < 0.05:
                    value = value + "\\textsuperscript{$\\dagger$}"
                elif metric_temp in p_values_prior_post[topic][condition] and p_values_prior_post[topic][condition][metric_temp] < 0.05:
                    value = value + "\\textsuperscript{$\\ddagger$}"
                rows.append([metric.replace("_", ""), topic, condition, value])

df = pd.DataFrame(rows, columns=["Metric", "Topic", "Setting", "Value"])

# Pivot to create multi-column structure
df_pivot = df.pivot(index="Metric", columns=["Topic", "Setting"], values="Value")

desired_order = [
    "premotivation", 
    "postmotivation", 
    "presubjcomprehension", 
    "postsubjcomprehension", 
    "postobjcomprehension", 
    "postenabledness",
    "postconstructiveness"
]

# Reorder the columns
df_pivot = df_pivot.reindex(desired_order)

# Generate LaTeX table with multi-columns
latex_table = df_pivot.T.to_latex(escape=False, multirow=True, column_format="ll" + "l" * len(chat_stat_metrics))

# Print or save LaTeX table
print(latex_table)


with open("results/stats_settings.json", "r") as file:
    data = json.load(file)

with open("results/p_values_settings.json", "r") as file:
    p_values = json.load(file)

with open("results/p_values_settings_pre_post.json", "r") as file:
    p_values_prior_post = json.load(file)

# Convert to DataFrame
chat_stat_metrics = ["pre_motivation", "post_motivation", "pre_subj_comprehension", "post_subj_comprehension", "post_obj_comprehension", "post_enabledness", "post_constructiveness"]
rows = []
for condition, values in data.items():
    for metric, value in values.items():
        if metric in chat_stat_metrics:
            value = value.replace("+-", "$\\pm$ ").replace("%", "\\%").replace("0:", "")
            metric_temp = metric.replace("pre_", "").replace("post_", "")
            if p_values[metric] < 0.05 and metric_temp in p_values_prior_post[condition] and p_values_prior_post[condition][metric_temp] < 0.05:
                value = value + "\\textsuperscript{$\\dagger\\ddagger$}"
            elif p_values[metric] < 0.05:
                value = value + "\\textsuperscript{$\\dagger$}"
            elif metric_temp in p_values_prior_post[condition] and p_values_prior_post[condition][metric_temp] < 0.05:
                value = value + "\\textsuperscript{$\\ddagger$}"
            rows.append([metric.replace("_", ""), "\\textbf{Overall}", condition, value])

df = pd.DataFrame(rows, columns=["Metric", "Topic", "Setting", "Value"])

# Pivot to create multi-column structure
df_pivot = df.pivot(index="Metric", columns=["Topic", "Setting"], values="Value")

desired_order = [
    "premotivation", 
    "postmotivation", 
    "presubjcomprehension", 
    "postsubjcomprehension", 
    "postobjcomprehension", 
    "postenabledness",
    "postconstructiveness"
]

# Reorder the columns
df_pivot = df_pivot.reindex(desired_order)

# Generate LaTeX table with multi-columns
latex_table = df_pivot.T.to_latex(escape=False, multirow=True, column_format="ll" + "l" * len(chat_stat_metrics))

# Print or save LaTeX table
print(latex_table)

\begin{tabular}{lllllllll}
\toprule
 & Metric & premotivation & postmotivation & presubjcomprehension & postsubjcomprehension & postobjcomprehension & postenabledness & postconstructiveness \\
Topic & Setting &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{2}{*}{quarto} & base & 3.9 $\pm$ 0.5 & 4.1 $\pm$ 0.6 & 3.0 $\pm$ 0.7\textsuperscript{$\ddagger$} & 3.8 $\pm$ 0.6\textsuperscript{$\dagger\ddagger$} & 73.3\% $\pm$ 17.7 & 59.6\% $\pm$ 24.8 & 3.8 $\pm$ 0.6 \\
 & enhanced & 4.0 $\pm$ 0.6 & 4.1 $\pm$ 0.6 & 3.1 $\pm$ 0.8\textsuperscript{$\ddagger$} & 4.0 $\pm$ 0.6\textsuperscript{$\dagger\ddagger$} & 73.9\% $\pm$ 18.6 & 66.1\% $\pm$ 18.1 & 4.0 $\pm$ 0.7 \\
\cline{1-9}
\multirow[t]{2}{*}{sleep} & base & 3.6 $\pm$ 0.7\textsuperscript{$\ddagger$} & 4.0 $\pm$ 0.7\textsuperscript{$\ddagger$} & 2.3 $\pm$ 0.8\textsuperscript{$\ddagger$} & 3.9 $\pm$ 0.5\textsuperscript{$\ddagger$} & 75.7\% $\pm$ 15.6 & 65.1\% $\pm$ 22.8 & 3.8 $\pm$ 0.6\textsuperscript{$\dagger$} \\
 & enhanced & 3.5 $\pm$ 0.7\texts