In [2]:
import json
import pandas as pd
import numpy as np

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
    multilabel_confusion_matrix
)
from sklearn.preprocessing import MultiLabelBinarizer


# ================================================================
# Funciones auxiliares
# ================================================================

def safe_parse_json(json_str):
    """Parsea JSON de forma segura."""
    if pd.isna(json_str):
        return None
    if isinstance(json_str, dict):
        return json_str
    if not isinstance(json_str, str):
        return None
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        return None


def extract_ids(json_obj):
    """
    Extrae IDs predichos desde JSON_RPTA.
    Si no encuentra ningún 'id', retorna set vacío y main_id=None.
    """
    if not isinstance(json_obj, dict):
        return set(), None

    labels = json_obj.get("labels", [])
    if not isinstance(labels, list):
        return set(), None

    ids = set()
    main_id = None
    max_conf = -1

    for item in labels:
        if not isinstance(item, dict):
            continue

        pred_id = item.get("id")
        if pred_id is None:
            continue  # No se considera la fila si no tiene ids

        try:
            pred_id = int(pred_id)
        except:
            continue

        ids.add(pred_id)

        conf = item.get("confianza", 0)
        try:
            conf = float(conf)
        except:
            conf = 0

        if conf > max_conf:
            max_conf = conf
            main_id = pred_id

    return ids, main_id


# ================================================================
# LECTURA DEL CSV
# ================================================================
df = pd.read_csv("resultados1.csv", encoding="utf-8", sep=";")

# Quitar filas con ID real == 0
df = df[df["ID"] != 0].copy()

# ================================================================
# EVALUACIÓN
# ================================================================
y_true_multi = []
y_pred_multi = []

y_true_mc = []
y_pred_mc = []

filas_descartadas_json = 0

for idx, row in df.iterrows():

    true_id = int(row["ID"])
    json_obj = safe_parse_json(row.get("JSON_RPTA"))

    if json_obj is None:
        filas_descartadas_json += 1
        continue

    pred_ids, main_id = extract_ids(json_obj)

    # Regla nueva: si NO trae ids → no usar para comparar
    if len(pred_ids) == 0:
        filas_descartadas_json += 1
        continue

    # MULTILABEL
    y_true_multi.append({true_id})
    y_pred_multi.append(pred_ids)

    # MULTICLASS
    y_true_mc.append(true_id)
    y_pred_mc.append(main_id if main_id is not None else 0)

# Si no quedan filas válidas, abortar
n_validas = len(y_true_multi)
if n_validas == 0:
    print("No hay filas válidas para evaluación.")
    exit()

# ================================================================
# MÉTRICAS MULTILABEL
# ================================================================
ids_reales = sorted({list(x)[0] for x in y_true_multi})  # ground truth único por fila
mlb = MultiLabelBinarizer(classes=ids_reales)

Y_true_ml = mlb.fit_transform(y_true_multi)
Y_pred_ml = mlb.transform(y_pred_multi)

accuracy_ml = accuracy_score(Y_true_ml, Y_pred_ml)

prec_micro, rec_micro, f1_micro, _ = precision_recall_fscore_support(
    Y_true_ml, Y_pred_ml, average="micro", zero_division=0
)
prec_macro, rec_macro, f1_macro, _ = precision_recall_fscore_support(
    Y_true_ml, Y_pred_ml, average="macro", zero_division=0
)
prec_weighted, rec_weighted, f1_weighted, _ = precision_recall_fscore_support(
    Y_true_ml, Y_pred_ml, average="weighted", zero_division=0
)

prec_cat, rec_cat, f1_cat, support_cat = precision_recall_fscore_support(
    Y_true_ml, Y_pred_ml, average=None, zero_division=0
)

ml_conf_matrices = multilabel_confusion_matrix(Y_true_ml, Y_pred_ml)


# ================================================================
# MÉTRICAS MULTICLASS
# ================================================================
acc_mc = accuracy_score(y_true_mc, y_pred_mc)

ids_presentes = sorted(set(y_true_mc) | set(y_pred_mc))
cm = confusion_matrix(y_true_mc, y_pred_mc, labels=ids_presentes)
cm_norm = np.nan_to_num(cm.astype(float) / cm.sum(axis=1, keepdims=True))


# ================================================================
# GENERAR REPORTE
# ================================================================
lines = []

lines.append("REPORTE DE EVALUACIÓN DEL CLASIFICADOR\n")
lines.append("======================================\n\n")
lines.append(f"Filas originales con ID != 0: {len(df)}\n")
lines.append(f"Filas **usadas** realmente en métricas: {n_validas}\n")
lines.append(f"Filas descartadas (JSON sin id o no válido): {filas_descartadas_json}\n\n")

lines.append("1. Métricas Globales (Multilabel)\n")
lines.append("---------------------------------\n")
lines.append(f"Accuracy multilabel: {accuracy_ml:.4f}\n\n")
lines.append(f"Micro Precision: {prec_micro:.4f}\n")
lines.append(f"Micro Recall: {rec_micro:.4f}\n")
lines.append(f"Micro F1: {f1_micro:.4f}\n\n")
lines.append(f"Macro Precision: {prec_macro:.4f}\n")
lines.append(f"Macro Recall: {rec_macro:.4f}\n")
lines.append(f"Macro F1: {f1_macro:.4f}\n\n")
lines.append(f"Weighted Precision: {prec_weighted:.4f}\n")
lines.append(f"Weighted Recall: {rec_weighted:.4f}\n")
lines.append(f"Weighted F1: {f1_weighted:.4f}\n\n")

lines.append("2. Métricas por Categoría (One-vs-Rest)\n")
lines.append("--------------------------------------\n")
lines.append("ID\tPrecision\tRecall\tF1\tSupport\n")
for i, cid in enumerate(mlb.classes_):
    lines.append(
        f"{cid}\t{prec_cat[i]:.4f}\t{rec_cat[i]:.4f}\t{f1_cat[i]:.4f}\t{support_cat[i]}\n"
    )

lines.append("\n3. Matriz de Confusión Multilabel (One-vs-Rest)\n")
for i, cid in enumerate(mlb.classes_):
    tn, fp, fn, tp = ml_conf_matrices[i].ravel()
    lines.append(f"\nID {cid}: [[TN={tn}, FP={fp}], [FN={fn}, TP={tp}]]\n")

lines.append("\n4. Métricas Globales (Multiclass)\n")
lines.append("--------------------------------\n")
lines.append(f"Accuracy multiclass: {acc_mc:.4f}\n")
lines.append("Nota: predicción 0 = sin predicción.\n\n")

lines.append("5. Matriz de Confusión Multiclase (Valores absolutos)\n")
header = "ID_real\\ID_pred\t" + "\t".join(str(c) for c in ids_presentes) + "\n"
lines.append(header)
for i, real_id in enumerate(ids_presentes):
    row_vals = "\t".join(str(v) for v in cm[i])
    lines.append(f"{real_id}\t\t{row_vals}\n")

lines.append("\n6. Matriz de Confusión Multiclase Normalizada (%)\n")
lines.append(header)
for i, real_id in enumerate(ids_presentes):
    row_vals = "\t".join(f"{v*100:.2f}" for v in cm_norm[i])
    lines.append(f"{real_id}\t\t{row_vals}\n")

# Guardar archivo
with open("reporte_evaluacion.txt", "w", encoding="utf-8") as f:
    for line in lines:
        f.write(line if line.endswith("\n") else line + "\n")

# Resumen en consola
print("===== RESUMEN ===telek")
print(f"Filas evaluadas realmente: {n_validas}")
print(f"Accuracy multilabel: {accuracy_ml:.4f}")
print(f"Accuracy multiclass: {acc_mc:.4f}")
print("Reporte generado en 'reporte_evaluacion.txt'")

===== RESUMEN ===telek
Filas evaluadas realmente: 593
Accuracy multilabel: 0.4671
Accuracy multiclass: 0.6610
Reporte generado en 'reporte_evaluacion.txt'


  cm_norm = np.nan_to_num(cm.astype(float) / cm.sum(axis=1, keepdims=True))
