# Human vs AI Experiment
This notebook supports a comparative experiment between humans and the trained model (Decision Tree).

## What This Notebook Does:
- Loads prediction results from the Decision Tree model.
- Identifies a subset of correct and incorrect predictions for **both classes**.
- Prepares 20 audio samples for human testing (True Positive, False Negative, True Negative, False Positive).

In [5]:
# Importing required libraries
import pandas as pd
import os
import shutil

In [6]:
# Load the Decision Tree predictions on in-the-wild dataset
df = pd.read_csv("decision_tree_predictions.csv")

# Preview the data
df.head()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,filename,label,LABEL,y_true,y_pred
0,0.321908,0.02908,892.288945,1233.848881,1905.975877,0.043149,-442.5108,138.27226,34.29983,25.730999,...,4.145848,-9.528758,-5.35257,2.855662,-6.844147,0.wav,fake,0,0,1
1,0.382224,0.021167,1841.4519,1742.591081,3266.863293,0.18386,-386.52838,131.19376,34.97732,-4.596958,...,22.538418,-4.61357,-14.307812,8.966136,-9.130708,1.wav,fake,0,0,1
2,0.442343,0.026622,1338.638364,1325.166356,2321.394231,0.10785,-429.7679,99.46211,35.457706,18.991756,...,0.191032,-12.39661,-1.86977,-2.484421,-2.308154,10.wav,fake,0,0,1
3,0.351164,0.046481,1150.950188,854.559094,2000.127378,0.097813,-304.12946,155.3363,-79.21963,13.790988,...,-9.226794,1.154727,-6.738657,-1.23853,1.064191,10000.wav,fake,0,0,1
4,0.350056,0.074956,1873.614317,1581.145914,3353.639539,0.14188,-265.87704,64.10755,-6.883793,18.329998,...,-3.327877,-3.487081,-2.881284,-3.818068,-1.765636,10002.wav,fake,0,0,1


In [2]:
# TP: Fake correctly predicted as fake
tp = df[(df["y_true"] == 0) & (df["y_pred"] == 0)]

# FN: Fake misclassified as real
fn = df[(df["y_true"] == 0) & (df["y_pred"] == 1)]

# TN: Real correctly predicted as real
tn = df[(df["y_true"] == 1) & (df["y_pred"] == 1)]

# FP: Real misclassified as fake
fp = df[(df["y_true"] == 1) & (df["y_pred"] == 0)]

# Show counts
print(f"TP: {len(tp)}, FN: {len(fn)}, TN: {len(tn)}, FP: {len(fp)}")

TP: 5138, FN: 6678, TN: 15784, FP: 4179


In [4]:
# Folder where fake/real audio files are stored
fake_folder = "data/processed_data/fake"
real_folder = "data/processed_data/real"

# Create output folder for sampled audios
os.makedirs("human_vs_ai_samples", exist_ok=True)

# Sample 5 audios from each category
sampled_tp = tp.sample(n=5, random_state=42)
sampled_fn = fn.sample(n=5, random_state=42)
sampled_tn = tn.sample(n=5, random_state=42)
sampled_fp = fp.sample(n=5, random_state=42)

# Combine and shuffle
combined = pd.concat([sampled_tp, sampled_fn, sampled_tn, sampled_fp]).sample(frac=1, random_state=42)

# Save audio files and generate reference table
meta = []

for i, row in combined.iterrows():
    filename = row["filename"]
    label = row["y_true"]
    pred = row["y_pred"]
    src = os.path.join(fake_folder if label == 0 else real_folder, filename)
    dst = os.path.join("human_vs_ai_samples", filename)
    
    try:
        shutil.copy(src, dst)
        meta.append({"filename": filename, "true_label": label, "model_prediction": pred})
    except FileNotFoundError:
        print(f"Missing file: {filename}")

# Save metadata
pd.DataFrame(meta).to_csv("human_vs_ai_samples/sample_metadata.csv", index=False)
print("Samples prepared.")

Samples prepared.
