## Load Libraries

In [16]:
import numpy as np
import pandas as pd
import soundfile as sf
from tqdm import tqdm
from IPython.display import Audio
from pyannote.core import Annotation, Segment
from pyannote.metrics.diarization import DiarizationErrorRate
import re


## Read model files

In [13]:
# Read csv files
assemblyai = pd.read_csv('Diarization results/assemblyai_diarization_der_0.1272_30.csv')
deepgram = pd.read_csv('Diarization results/deepgram_diarization_der_0.1421_30.csv')
nemo = pd.read_csv('Diarization results/nemo_diarization.csv')
pyannote = pd.read_csv('Diarization results/pyannote_diarization_der_0.2130_30.csv')
soniox = pd.read_csv('Diarization results/soniox_diarization_der_0.2005_30.csv')
reverb = pd.read_csv('Diarization results/reverb_diarization_der_0.2687_30.csv')

# Select only rows [0:31] of reverb
reverb = reverb.iloc[0:31]
# Filter out rows where 'audio_id' is nan in reverb
reverb = reverb[reverb['audio_id'].notna()]


## Check if audio_id is same for all files

In [14]:
# Verify that 'audio_id' columns are the same across all dataframes, if successful print a message
def verify_audio_ids(*dfs):
    audio_ids = [set(df['audio_id']) for df in dfs]
    if not all(audio_ids[0] == audio_id for audio_id in audio_ids):
        raise ValueError("Audio IDs do not match across all dataframes.")
        # Print all mismatched audio IDs
    else:
        print("All audio IDs match across the dataframes.")
verify_audio_ids(assemblyai, deepgram, nemo, pyannote, soniox, reverb)

All audio IDs match across the dataframes.


## DER Matrics

In [17]:
def create_pyannote_annotation(segments_list):
    annotation = Annotation()
    for start, end, speaker_tag in segments_list:
        segment = Segment(start, end)
        annotation[segment] = speaker_tag
    return annotation

der_metric = DiarizationErrorRate()

## Calculate absolute DER for all files

In [None]:
def compute_der_for_dataset(df, ref_col='ref_segments', pred_col='pred_segments'):
    results = []
    der_metric = DiarizationErrorRate()
    for i, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
        # Skip rows where either ref_col or pred_col is not a string or list (e.g., NaN or float)
        if not (isinstance(row[ref_col], (str, list)) and isinstance(row[pred_col], (str, list))):
            continue
        ref_annotation = create_pyannote_annotation(eval(row[ref_col]) if isinstance(row[ref_col], str) else row[ref_col])
        pred_annotation = create_pyannote_annotation(eval(row[pred_col]) if isinstance(row[pred_col], str) else row[pred_col])
        der = der_metric(ref_annotation, pred_annotation)
        results.append({'audio_id': row['audio_id'], 'DER': der})
    abs_der = abs(der_metric)
    print(f"Absolute DER for dataset: {100 * abs_der:.2f}%")
    return pd.DataFrame(results), abs_der

# Compute DER for all datasets and store results
datasets = {
    'assemblyai': assemblyai,
    'deepgram': deepgram,
    'nemo': nemo,
    'pyannote': pyannote,
    'soniox': soniox,
    'reverb': reverb
}

der_results = {}
abs_ders = {}

for name, df in datasets.items():
    print(f"\nProcessing {name}...")
    der_df, abs_der = compute_der_for_dataset(df)
    der_results[name] = der_df
    abs_ders[name] = abs_der


Processing assemblyai...


Processing: 100%|██████████| 30/30 [00:00<00:00, 130.00it/s]


Absolute DER for dataset: 12.72%

Processing deepgram...


Processing: 100%|██████████| 30/30 [00:00<00:00, 132.03it/s]


Absolute DER for dataset: 14.21%

Processing nemo...


Processing: 100%|██████████| 30/30 [00:00<00:00, 38.18it/s]


Absolute DER for dataset: 26.82%

Processing pyannote...


Processing: 100%|██████████| 30/30 [00:00<00:00, 146.01it/s]


Absolute DER for dataset: 21.30%

Processing soniox...


Processing: 100%|██████████| 30/30 [00:00<00:00, 179.59it/s]


Absolute DER for dataset: 20.05%

Processing reverb...


Processing: 100%|██████████| 30/30 [00:00<00:00, 147.12it/s]

Absolute DER for dataset: 20.23%



