In [None]:
import pandas as pd

# Read the CSV file
df_text_clas = pd.read_csv('df_text_clas.csv')


In [None]:
human_cols = ['vic_grupo_social', 'amenaza_quien', 'captura_metodo', 'captura_tipo', 'cautiverio_trato']

machine_cols = ['vic_grupo_social_classification', 'amenaza_quien_classification', 'captura_metodo_classification', 
                'captura_tipo_classification', 'cautiverio_trato_classification']

validation_results = {}

for human_col, machine_col in zip(human_cols, machine_cols):
    human = df_text_clas[human_col].fillna('').astype(str)
    machine = df_text_clas[machine_col].fillna('').astype(str)
    
    mask = (human != '') & (machine != '')
    human_filtered = human[mask]
    machine_filtered = machine[mask]
    
    if len(human_filtered) > 0:
        correct = (human_filtered == machine_filtered).sum()
        total = len(human_filtered)
        accuracy = correct / total
        
        unique_classes = set(human_filtered.unique())
        precisions = []
        
        for class_label in unique_classes:
            true_positives = ((human_filtered == class_label) & (machine_filtered == class_label)).sum()
            false_positives = ((human_filtered != class_label) & (machine_filtered == class_label)).sum()
            
            precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
            precisions.append(precision)
        
        avg_precision = sum(precisions) / len(precisions) if len(precisions) > 0 else 0
        
        validation_results[human_col] = {
            'accuracy': accuracy,
            'precision': avg_precision
        }

validation_df = pd.DataFrame(validation_results).T
validation_df
