In [None]:
import pandas as pd
import sys
from pathlib import Path
import ipynbname
ROOT = Path(ipynbname.path()).resolve().parents[1]

In [18]:
import pandas as pd

# Mapping des noms des embeddings pour affichage
embedding_names = {
    "albert": "ALBERT",
    "bart": "BART",
    "bert": "BERT",
    "distilbert": "DistilBERT",
    "minilm": "MiniLM",
    "roberta": "RoBERTa"
}

models = list(embedding_names.keys())

# Fonction pour formater une cellule LaTeX
def format_latex(mean, std=None):
    if std is None or pd.isna(std):
        return f"${mean:.2f}$"
    return f"${mean:.2f}\\pm{std:.2f}$"

# Les scénarios
scenarios = {
    "si_si": "(S1, I1) $\\leftrightarrow$ (S2, I2)",      
}

# ========================
# Génération des tableaux
# ========================

for abr, scenario in scenarios.items():
    # Charger et concaténer les CSV pour ce scénario
    all_results = []
    for model in models:
        path= ROOT / f'RQ3_Feature_importance/{model}/feature_importance.csv'        
        df = pd.read_csv(path)
        df["Embedding"] = embedding_names[model]
        all_results.append(df)

    df_all = pd.concat(all_results)

    families = df_all["Family"].unique()
    embeddings = list(embedding_names.values())

    # =====================
    # Construire LaTeX
    # =====================
    header = (
        "\\begin{table}[!ht]\n"
        "\\footnotesize\n"
        "\\centering\n"
        f"\\caption{{Precision, Recall, and F1-score per family and embedding model for scenario {scenario}. Row- and column-wise means are included.}}\n"
        f"\\label{{tab:family_prec_rec_f1_{abr}}}\n"
        "\\resizebox{\\textwidth}{!}{%\n"
        "\\begin{tabular}{l l " + "c" * len(embeddings) + "}\n"
        "\\toprule\n"
        "\\textbf{Family} & \\textbf{Metric} & "
        + " & ".join(embeddings)
        + " \\\\\n"
        "\\midrule\n"
    )

    body = ""
    for fam in families:
        sub = df_all[df_all["Family"] == fam]
        for i, metric in enumerate(["Precision", "Recall", "F1"]):
            row = ""
            if i == 0:
                row += f"\\multirow{{3}}{{*}}{{{fam}}} & {metric}"
            else:
                row += " & " + metric
            values = []
            for emb in embeddings:
                val = sub[sub["Embedding"] == emb]
                if not val.empty:
                    mean = val[metric].mean()
                    std = val[metric].std()
                    values.append(format_latex(mean, std))
                else:
                    values.append("--")
            row += " & " + " & ".join(values) + " \\\\\n"
            body += row
        body += "\\midrule\n"

    # === Bloc Embedding Mean corrigé ===
    for i, metric in enumerate(["Precision", "Recall", "F1"]):
        row = ""
        if i == 0:
            row += "\\multirow{3}{*}{\\textbf{Embedding Mean}} & " + metric
        else:
            row += " & " + metric
        values = []
        for emb in embeddings:
            mean = df_all[df_all["Embedding"] == emb][metric].mean()
            std = df_all[df_all["Embedding"] == emb][metric].std()
            values.append(format_latex(mean, std))
        row += " & " + " & ".join(values) + " \\\\\n"
        body += row
    body += "\\bottomrule\n"


    footer = "\\end{tabular}\n}\n\\end{table}\n"

    latex_code = header + body + footer
    print("\n\n")
    print(latex_code)





\begin{table}[!ht]
\footnotesize
\centering
\caption{Precision, Recall, and F1-score per family and embedding model for scenario (S1, I1) $\leftrightarrow$ (S2, I2). Row- and column-wise means are included.}
\label{tab:family_prec_rec_f1_si_si}
\resizebox{\textwidth}{!}{%
\begin{tabular}{l l cccccc}
\toprule
\textbf{Family} & \textbf{Metric} & ALBERT & BART & BERT & DistilBERT & MiniLM & RoBERTa \\
\midrule
\multirow{3}{*}{Topology} & Precision & $0.63\pm0.06$ & $0.61\pm0.07$ & $0.63\pm0.05$ & $0.63\pm0.08$ & $0.62\pm0.07$ & $0.63\pm0.09$ \\
 & Recall & $0.43\pm0.04$ & $0.43\pm0.03$ & $0.42\pm0.03$ & $0.42\pm0.04$ & $0.43\pm0.04$ & $0.43\pm0.04$ \\
 & F1 & $0.51\pm0.03$ & $0.50\pm0.03$ & $0.50\pm0.03$ & $0.50\pm0.04$ & $0.50\pm0.03$ & $0.51\pm0.04$ \\
\midrule
\multirow{3}{*}{Spectral} & Precision & $0.29\pm0.04$ & $0.29\pm0.04$ & $0.29\pm0.03$ & $0.30\pm0.04$ & $0.30\pm0.04$ & $0.29\pm0.05$ \\
 & Recall & $0.20\pm0.03$ & $0.21\pm0.03$ & $0.20\pm0.03$ & $0.19\pm0.03$ & $0.21\pm0.03$

In [14]:
import pandas as pd

# Mapping des noms des embeddings pour affichage
embedding_names = {
    "albert": "ALBERT",
    "bart": "BART",
    "bert": "BERT",
    "distilbert": "DistilBERT",
    "minilm": "MiniLM",
    "roberta": "RoBERTa"
}

models = list(embedding_names.keys())

# Fonction pour formater une cellule LaTeX
def format_latex(mean, std=None):
    if std is None or pd.isna(std):
        return f"${mean:.2f}$"
    return f"${mean:.2f}\\pm{std:.2f}$"

# Les scénarios
scenarios = {
    "si_si": "(S1, I1) $\\leftrightarrow$ (S2, I2)",       
}

# ========================
# Génération des tableaux
# ========================

for abr, scenario in scenarios.items():
    # Charger et concaténer les CSV pour ce scénario
    all_results = []
    for model in models:
        path = ROOT / f"RQ3_Feature_importance/{model}/feature_importance.csv"
        df = pd.read_csv(path)
        df["Embedding"] = embedding_names[model]
        all_results.append(df)

    df_all = pd.concat(all_results)

    families = df_all["Family"].unique()
    embeddings = list(embedding_names.values())

    # =====================
    # Construire LaTeX
    # =====================
    header = (
        "\\begin{table}[htbp!]\n"
        "\\footnotesize\n"
        "\\centering\n"
        f"\\caption{{Precision, Recall, and F1-score per family and embedding model for scenario {scenario}. Row- and column-wise means are included.}}\n"
        f"\\label{{tab:family_prec_rec_f1_{abr}}}\n"
        "\\resizebox{\\textwidth}{!}{%\n"
        "\\begin{tabular}{l l " + "c" * (len(embeddings) + 1) + "}\n"
        "\\toprule\n"
        "\\textbf{Family} & \\textbf{Metric} & "
        + " & ".join(embeddings) + " & \\textbf{Family Mean} \\\\\n"
        "\\midrule\n"
    )

    body = ""
    for fam in families:
        sub = df_all[df_all["Family"] == fam]
        for i, metric in enumerate(["Precision", "Recall", "F1"]):
            row = ""
            if i == 0:
                row += f"\\multirow{{3}}{{*}}{{{fam}}} & {metric}"
            else:
                row += " & " + metric
            values = []
            for emb in embeddings:
                val = sub[sub["Embedding"] == emb]
                if not val.empty:
                    mean = val[metric].mean()
                    std = val[metric].std()
                    values.append(format_latex(mean, std))
                else:
                    values.append("--")
            # Mean par famille (sur toutes les embeddings)
            mean = sub[metric].mean()
            std = sub[metric].std()
            values.append(format_latex(mean, std))
            row += " & " + " & ".join(values) + " \\\\\n"
            body += row
        body += "\\midrule\n"

    # === Bloc Embedding Mean corrigé ===
    for i, metric in enumerate(["Precision", "Recall", "F1"]):
        row = ""
        if i == 0:
            row += "\\multirow{3}{*}{\\textbf{Embedding Mean}} & " + metric
        else:
            row += " & " + metric
        values = []
        for emb in embeddings:
            mean = df_all[df_all["Embedding"] == emb][metric].mean()
            std = df_all[df_all["Embedding"] == emb][metric].std()
            values.append(format_latex(mean, std))
        values.append("--")  # pas de "family mean" pour cette ligne
        row += " & " + " & ".join(values) + " \\\\\n"
        body += row
    body += "\\bottomrule\n"

    footer = "\\end{tabular}\n}\n\\end{table}\n"

    latex_code = header + body + footer
    print("\n\n")
    print(latex_code)





\begin{table}[htbp!]
\footnotesize
\centering
\caption{Precision, Recall, and F1-score per family and embedding model for scenario (S1, I1) $\leftrightarrow$ (S2, I2). Row- and column-wise means are included.}
\label{tab:family_prec_rec_f1_si_si}
\resizebox{\textwidth}{!}{%
\begin{tabular}{l l ccccccc}
\toprule
\textbf{Family} & \textbf{Metric} & ALBERT & BART & BERT & DistilBERT & MiniLM & RoBERTa & \textbf{Family Mean} \\
\midrule
\multirow{3}{*}{Topology} & Precision & $0.63\pm0.06$ & $0.61\pm0.07$ & $0.63\pm0.05$ & $0.63\pm0.08$ & $0.62\pm0.07$ & $0.63\pm0.09$ & $0.62\pm0.07$ \\
 & Recall & $0.43\pm0.04$ & $0.43\pm0.03$ & $0.42\pm0.03$ & $0.42\pm0.04$ & $0.43\pm0.04$ & $0.43\pm0.04$ & $0.43\pm0.03$ \\
 & F1 & $0.51\pm0.03$ & $0.50\pm0.03$ & $0.50\pm0.03$ & $0.50\pm0.04$ & $0.50\pm0.03$ & $0.51\pm0.04$ & $0.50\pm0.03$ \\
\midrule
\multirow{3}{*}{Spectral} & Precision & $0.29\pm0.04$ & $0.29\pm0.04$ & $0.29\pm0.03$ & $0.30\pm0.04$ & $0.30\pm0.04$ & $0.29\pm0.05$ & $0.29\pm0.04$ \\