In [44]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, matthews_corrcoef

In [45]:
definition_path = '/home/wake/mnt/results_run_japanese/collapsed_generation_inputs.csv'
strict_path = '/home/wake/mnt/results_run_japanese/collapsed_strict_generation_inputs.csv'

def_df = pd.read_csv(definition_path)
str_df = pd.read_csv(strict_path)

In [46]:
print(def_df.columns)

Index(['video_id', 'title', 'description', 'transcript', 'context_text',
       'pred', 'label'],
      dtype='object')


In [47]:
OUT_COLS = [
    "accuracy", "f1_weighted",
    "real_precision", "real_recall", "real_f1",
    "fake_precision", "fake_recall", "fake_f1",
    "cm_act_real_pred_real", "cm_act_real_pred_fake",
    "cm_act_fake_pred_real", "cm_act_fake_pred_fake",
    "MCC"
]

In [48]:
def_y_true = def_df['label'].to_numpy()
def_y_pred = def_df['pred'].to_numpy()
str_y_true = str_df['label'].to_numpy()
str_y_pred = str_df['pred'].to_numpy()
label = ["real", "fake"]

In [49]:
def_acc = accuracy_score(def_y_true, def_y_pred)
str_acc = accuracy_score(str_y_true, str_y_pred)
def_prec, def_rec, def_f1, _ = precision_recall_fscore_support(def_y_true, def_y_pred, labels=label, zero_division=0)
str_prec, str_rec, str_f1, _ = precision_recall_fscore_support(str_y_true, str_y_pred, labels=label, zero_division=0)
_, _, def_f1_weighted, _ = precision_recall_fscore_support(def_y_true, def_y_pred, average='weighted', zero_division=0)
_, _, str_f1_weighted, _ = precision_recall_fscore_support(str_y_true, str_y_pred, average='weighted', zero_division=0)
def_cm = confusion_matrix(def_y_true, def_y_pred, labels=label)
str_cm = confusion_matrix(str_y_true, str_y_pred, labels=label)
def_mcc = matthews_corrcoef(def_y_true, def_y_pred)
str_mcc = matthews_corrcoef(str_y_true, str_y_pred)

In [None]:
def_row = {
    "accuracy": def_acc,
    "f1_weighted": def_f1_weighted,
    "real_precision": def_prec[0],
    "real_recall": def_rec[0],
    "real_f1": def_f1[0],
    "fake_precision": def_prec[1],
    "fake_recall": def_rec[1],
    "fake_f1": def_f1[1],
    "cm_act_real_pred_real": def_cm[0,0],
    "cm_act_real_pred_fake": def_cm[0,1],
    "cm_act_fake_pred_real": def_cm[1,0],
    "cm_act_fake_pred_fake": def_cm[1,1],
    "MCC": def_mcc,
}

str_row = {
    "accuracy": str_acc,
    "f1_weighted": str_f1_weighted,
    "real_precision": str_prec[0],
    "real_recall": str_rec[0],
    "real_f1": str_f1[0],
    "fake_precision": str_prec[1],
    "fake_recall": str_rec[1],
    "fake_f1": str_f1[1],
    "cm_act_real_pred_real": str_cm[0,0],
    "cm_act_real_pred_fake": str_cm[0,1],
    "cm_act_fake_pred_real": str_cm[1,0],
    "cm_act_fake_pred_fake": str_cm[1,1],
    "MCC": str_mcc,
}

In [51]:
def_out_df = pd.DataFrame([[def_row[c] for c in OUT_COLS]], columns=OUT_COLS)
str_out_df = pd.DataFrame([[str_row[c] for c in OUT_COLS]], columns=OUT_COLS)

results_dir = "/home/wake/projects/proposal/local_llm/evaluation/raptor/raptor_repos/raptor_ja/results"
os.makedirs(results_dir, exist_ok=True)
def_output_path = f"{results_dir}/def_results.csv"
str_output_path = f"{results_dir}/str_results.csv"

def_out_df.to_csv(def_output_path, index=False)
str_out_df.to_csv(str_output_path, index=False)

In [52]:
print(def_out_df['MCC'])

0    0.226892
Name: MCC, dtype: float64


In [53]:
print(str_out_df['MCC'])

0    0.364933
Name: MCC, dtype: float64
