In [1]:
import sys, torch, speechbrain
print("Python:", sys.executable)
print("torch:", torch.__version__)
print("speechbrain version:", speechbrain.__version__)
print("speechbrain file:", speechbrain.__file__)

from speechbrain.inference import EncoderClassifier
print("EncoderClassifier import OK")


  from .autonotebook import tqdm as notebook_tqdm


Python: /home/amit/yoav_zucker/Ecapatdnn-vox/.venv/bin/python
torch: 2.2.2+cpu
speechbrain version: 1.0.3
speechbrain file: /home/amit/yoav_zucker/Ecapatdnn-vox/.venv/lib/python3.10/site-packages/speechbrain/__init__.py
EncoderClassifier import OK


In [2]:
from pathlib import Path
import torch
from speechbrain.inference import EncoderClassifier

PROJECT_DIR = Path("/home/amit/yoav_zucker/Ecapatdnn-vox").resolve()
DATA_DIR = PROJECT_DIR / "data"
DATA_DIR.mkdir(exist_ok=True)

print("DATA_DIR:", DATA_DIR)
print("GPU available:", torch.cuda.is_available())

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

ecapa = EncoderClassifier.from_hparams(
    source="speechbrain/spkrec-ecapa-voxceleb",
    savedir=str(DATA_DIR / "ecapa_pretrained"),
    run_opts={"device": DEVICE},
).to(DEVICE)

print("ECAPA model loaded on:", DEVICE)


DATA_DIR: /home/amit/yoav_zucker/Ecapatdnn-vox/data
GPU available: False
ECAPA model loaded on: cpu


In [3]:
import pandas as pd

meta_df = pd.read_csv(PROJECT_DIR / "vox1_meta.csv")
id_to_pretty_name = {
    spk_id: vgg_name.replace("_", " ")
    for spk_id, vgg_name in zip(meta_df["VoxCeleb1 ID"], meta_df["VGGFace1 ID"])
}

MINI_VOX_DIR = DATA_DIR / "mini_voxceleb1"
wav_files = sorted(MINI_VOX_DIR.rglob("*.wav"))

print("Total WAV files:", len(wav_files))
wav_files[:5]


Total WAV files: 900


[PosixPath('/home/amit/yoav_zucker/Ecapatdnn-vox/data/mini_voxceleb1/datasets--s3prl--mini_voxceleb1/snapshots/43c1579f7bc6c72f7e6ff55bf2bd14703f0bbbaa/test/id10012-0AXjxNXiEzo-00005.wav'),
 PosixPath('/home/amit/yoav_zucker/Ecapatdnn-vox/data/mini_voxceleb1/datasets--s3prl--mini_voxceleb1/snapshots/43c1579f7bc6c72f7e6ff55bf2bd14703f0bbbaa/test/id10012-0AXjxNXiEzo-00008.wav'),
 PosixPath('/home/amit/yoav_zucker/Ecapatdnn-vox/data/mini_voxceleb1/datasets--s3prl--mini_voxceleb1/snapshots/43c1579f7bc6c72f7e6ff55bf2bd14703f0bbbaa/test/id10012-GQxAiL_gSJg-00011.wav'),
 PosixPath('/home/amit/yoav_zucker/Ecapatdnn-vox/data/mini_voxceleb1/datasets--s3prl--mini_voxceleb1/snapshots/43c1579f7bc6c72f7e6ff55bf2bd14703f0bbbaa/test/id10012-GQxAiL_gSJg-00016.wav'),
 PosixPath('/home/amit/yoav_zucker/Ecapatdnn-vox/data/mini_voxceleb1/datasets--s3prl--mini_voxceleb1/snapshots/43c1579f7bc6c72f7e6ff55bf2bd14703f0bbbaa/test/id10012-GQxAiL_gSJg-00023.wav')]

In [7]:
import pandas as pd

rows = []

for p in wav_files:
    fname = p.name
    true_id = fname.split("-")[0]
    true_name = id_to_pretty_name.get(true_id, "UNKNOWN")

    scores, score, index, pred_label = ecapa.classify_file(str(p))

    # pred_label may be a list/tuple like ["id10001"]
    if isinstance(pred_label, (list, tuple)):
        pred_id = pred_label[0]
    else:
        pred_id = pred_label

    pred_id = str(pred_id)
    pred_name = id_to_pretty_name.get(pred_id, "UNKNOWN")

    rows.append({
        "file": fname,
        "true_id": true_id,
        "true_name": true_name,
        "pred_id": pred_id,
        "pred_name": pred_name,
        "score": float(score),
        "correct": (pred_id == true_id),
    })

res_df = pd.DataFrame(rows)
res_df.head(10)


Unnamed: 0,file,true_id,true_name,pred_id,pred_name,score,correct
0,id10012-0AXjxNXiEzo-00005.wav,id10012,Adam Driver,id10012,Adam Driver,0.770855,True
1,id10012-0AXjxNXiEzo-00008.wav,id10012,Adam Driver,id10012,Adam Driver,0.668523,True
2,id10012-GQxAiL_gSJg-00011.wav,id10012,Adam Driver,id10012,Adam Driver,0.786252,True
3,id10012-GQxAiL_gSJg-00016.wav,id10012,Adam Driver,id10012,Adam Driver,0.664431,True
4,id10012-GQxAiL_gSJg-00023.wav,id10012,Adam Driver,id10012,Adam Driver,0.734537,True
5,id10012-UJFb8jDbWl0-00003.wav,id10012,Adam Driver,id10012,Adam Driver,0.772641,True
6,id10012-UJFb8jDbWl0-00011.wav,id10012,Adam Driver,id10012,Adam Driver,0.789539,True
7,id10012-nCwwVjPNloY-00014.wav,id10012,Adam Driver,id10012,Adam Driver,0.713068,True
8,id10012-o-8xXV5MC8I-00003.wav,id10012,Adam Driver,id10012,Adam Driver,0.71417,True
9,id10012-tdeNGXCvfno-00008.wav,id10012,Adam Driver,id10012,Adam Driver,0.799193,True


In [14]:
for row in res_df.itertuples():
    if not row.correct:
        print(
            row
        )

Pandas(Index=98, file='id10313-iyNUxbcGGuk-00001.wav', true_id='id10313', true_name='Felicia Day', pred_id='id08335', pred_name='UNKNOWN', score=0.3597315847873688, correct=False)
Pandas(Index=720, file='id10509-NARTXzg1SIw-00001.wav', true_id='id10509', true_name='Joan Cusack', pred_id='id04199', pred_name='UNKNOWN', score=0.48815226554870605, correct=False)
Pandas(Index=721, file='id10509-NARTXzg1SIw-00002.wav', true_id='id10509', true_name='Joan Cusack', pred_id='id04199', pred_name='UNKNOWN', score=0.39287522435188293, correct=False)


In [8]:
acc = res_df["correct"].mean()
print(f"Accuracy on {len(res_df)} files: {acc*100:.2f}%")


Accuracy on 900 files: 99.67%
