In [25]:
# %%
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torchaudio
from tqdm import tqdm
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [26]:
# %%
PROJECT_ROOT = Path.cwd().parents[1]
sys.path.append(str(PROJECT_ROOT))

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("PROJECT_ROOT:", PROJECT_ROOT)
print("DEVICE:", DEVICE)


PROJECT_ROOT: /home/SpeakerRec/BioVoice
DEVICE: cuda


In [27]:
# %%
redim_model = torch.hub.load(
    "IDRnD/ReDimNet",
    "ReDimNet",
    model_name="b5",
    train_type="ptn",
    dataset="vox2",
).to(DEVICE).eval()

for p in redim_model.parameters():
    p.requires_grad = False

print("Loaded ReDimNet (frozen).")


Using cache found in /home/SpeakerRec/.cache/torch/hub/IDRnD_ReDimNet_master


Loaded ReDimNet (frozen).


In [28]:
# %%
WAV_DIR = PROJECT_ROOT / "data" / "wavs"

SPEAKERS = ["eden", "idan", "yoav"]

def speaker_from_name(p: Path):
    name = p.stem.lower()
    for s in SPEAKERS:
        if name.startswith(s + "_"):
            return s
    return None


In [29]:
# %%
TARGET_SR = 16000

@torch.no_grad()
def embed_with_redim(wav_path: Path):
    wav, sr = torchaudio.load(wav_path)

    if sr != TARGET_SR:
        wav = torchaudio.functional.resample(wav, sr, TARGET_SR)

    wav = wav[:1].to(DEVICE)  # mono
    emb = redim_model(wav).squeeze(0)  # [192]

    emb = F.normalize(emb, dim=0)  # L2 norm
    return emb.cpu()


In [30]:
# %%
rows = []

wav_paths = sorted(WAV_DIR.glob("*.wav"))

for p in tqdm(wav_paths):
    spk = speaker_from_name(p)
    if spk is None:
        continue

    emb = embed_with_redim(p)

    rows.append({
        "path": str(p),
        "file": p.name,
        "speaker": spk,
        "embedding": emb
    })

df = pd.DataFrame(rows)
print("Total samples:", len(df))
df.head()


100%|██████████| 90/90 [00:05<00:00, 17.04it/s]


Total samples: 90


Unnamed: 0,path,file,speaker,embedding
0,/home/SpeakerRec/BioVoice/data/wavs/eden_001.wav,eden_001.wav,eden,"[tensor(-0.0875), tensor(0.0159), tensor(0.008..."
1,/home/SpeakerRec/BioVoice/data/wavs/eden_002.wav,eden_002.wav,eden,"[tensor(-0.1283), tensor(0.0003), tensor(0.005..."
2,/home/SpeakerRec/BioVoice/data/wavs/eden_003.wav,eden_003.wav,eden,"[tensor(-0.1131), tensor(-0.0485), tensor(0.03..."
3,/home/SpeakerRec/BioVoice/data/wavs/eden_004.wav,eden_004.wav,eden,"[tensor(-0.0768), tensor(-0.0396), tensor(0.03..."
4,/home/SpeakerRec/BioVoice/data/wavs/eden_005.wav,eden_005.wav,eden,"[tensor(-0.1357), tensor(0.0630), tensor(0.099..."


In [31]:
# %%
centroids = {}

for spk in SPEAKERS:
    embs = torch.stack(df[df["speaker"] == spk]["embedding"].tolist())
    centroid = embs.mean(dim=0)
    centroid = F.normalize(centroid, dim=0)
    centroids[spk] = centroid

print("Computed centroids for:", list(centroids.keys()))


Computed centroids for: ['eden', 'idan', 'yoav']


In [32]:
# %%
def cosine_to_centroid(emb, centroid):
    return float(torch.dot(emb, centroid))

df["cosine_to_centroid"] = [
    cosine_to_centroid(row.embedding, centroids[row.speaker])
    for row in df.itertuples()
]


In [33]:
# %%
df_sorted = (
    df.sort_values(["speaker", "cosine_to_centroid"], ascending=[True, False])
    .reset_index(drop=True)
)

df_sorted.head(10)


Unnamed: 0,path,file,speaker,embedding,cosine_to_centroid
0,/home/SpeakerRec/BioVoice/data/wavs/eden_020.wav,eden_020.wav,eden,"[tensor(-0.1540), tensor(0.0456), tensor(0.026...",0.867826
1,/home/SpeakerRec/BioVoice/data/wavs/eden_014.wav,eden_014.wav,eden,"[tensor(-0.1386), tensor(0.0100), tensor(0.047...",0.859773
2,/home/SpeakerRec/BioVoice/data/wavs/eden_030.wav,eden_030.wav,eden,"[tensor(-0.0657), tensor(-0.0811), tensor(0.06...",0.853604
3,/home/SpeakerRec/BioVoice/data/wavs/eden_022.wav,eden_022.wav,eden,"[tensor(-0.1560), tensor(-0.0205), tensor(0.09...",0.852639
4,/home/SpeakerRec/BioVoice/data/wavs/eden_027.wav,eden_027.wav,eden,"[tensor(-0.2169), tensor(0.0054), tensor(0.100...",0.85119
5,/home/SpeakerRec/BioVoice/data/wavs/eden_011.wav,eden_011.wav,eden,"[tensor(-0.1105), tensor(-0.0325), tensor(0.17...",0.850186
6,/home/SpeakerRec/BioVoice/data/wavs/eden_015.wav,eden_015.wav,eden,"[tensor(-0.1485), tensor(-0.0228), tensor(0.06...",0.83857
7,/home/SpeakerRec/BioVoice/data/wavs/eden_013.wav,eden_013.wav,eden,"[tensor(-0.0536), tensor(-0.0460), tensor(0.05...",0.830886
8,/home/SpeakerRec/BioVoice/data/wavs/eden_029.wav,eden_029.wav,eden,"[tensor(-0.1545), tensor(-0.0141), tensor(0.06...",0.83071
9,/home/SpeakerRec/BioVoice/data/wavs/eden_016.wav,eden_016.wav,eden,"[tensor(-0.0846), tensor(0.0181), tensor(0.079...",0.826087


In [34]:
# %%
TOP_K = 5

for spk in SPEAKERS:
    print(f"\n===== TOP {TOP_K} MOST '{spk.upper()}' SAMPLES =====")
    display(
        df_sorted[df_sorted["speaker"] == spk]
        .head(TOP_K)[["file", "cosine_to_centroid"]]
    )



===== TOP 5 MOST 'EDEN' SAMPLES =====


Unnamed: 0,file,cosine_to_centroid
0,eden_020.wav,0.867826
1,eden_014.wav,0.859773
2,eden_030.wav,0.853604
3,eden_022.wav,0.852639
4,eden_027.wav,0.85119



===== TOP 5 MOST 'IDAN' SAMPLES =====


Unnamed: 0,file,cosine_to_centroid
30,idan_009.wav,0.911093
31,idan_016.wav,0.910186
32,idan_012.wav,0.907344
33,idan_022.wav,0.906809
34,idan_019.wav,0.900762



===== TOP 5 MOST 'YOAV' SAMPLES =====


Unnamed: 0,file,cosine_to_centroid
60,yoav_024.wav,0.897707
61,yoav_007.wav,0.891558
62,yoav_022.wav,0.887954
63,yoav_021.wav,0.878858
64,yoav_014.wav,0.873716


In [35]:
# %%
stats = []

for spk in SPEAKERS:
    sims = df_sorted[df_sorted["speaker"] == spk]["cosine_to_centroid"].values
    stats.append({
        "speaker": spk,
        "min": sims.min(),
        "max": sims.max(),
        "mean": sims.mean(),
        "std": sims.std(),
        "range": sims.max() - sims.min()
    })

stats_df = pd.DataFrame(stats)
display(stats_df)


Unnamed: 0,speaker,min,max,mean,std,range
0,eden,0.717641,0.867826,0.80532,0.038546,0.150185
1,idan,0.793686,0.911093,0.87363,0.025822,0.117407
2,yoav,0.68003,0.897707,0.81755,0.055169,0.217677


In [36]:
# %%
out_path =  "speaker_similarity_ranking.csv"
df_sorted_out = df_sorted.drop(columns=["embedding"])
df_sorted_out.to_csv(out_path, index=False)

print("Saved ranking to:", out_path)


Saved ranking to: speaker_similarity_ranking.csv
