In [None]:
import argparse
import logging
import matplotlib
import copy
import os
from matplotlib import pyplot as plot
from s4d.utils import *
from s4d.diar import Diar
from s4d import viterbi, segmentation
from s4d.clustering import hac_bic
from sidekit.sidekit_io import init_logging
from s4d.gui.dendrogram import plot_dendrogram
from s4d.diar import Diar
from s4d.utils import *
from s4d import scoring
from s4d.model_iv import ModelIV
from sidekit.sidekit_io import *
from sidekit.bosaris import IdMap, Scores
from s4d.clustering.hac_iv import hac_iv
from s4d import scoring
import numpy as np
import traceback

In [None]:
loglevel = logging.DEBUG
init_logging(level=loglevel)

## Setting important directories

In [None]:
show = 'ES2003a.Mix-Headset'
dir = 'out-mfcc-with-vad-vit'
#dir = 'out-mfcc-with-vad'
#dir = 'out'
wdir = os.path.join(dir, show)
sdir = os.path.join(wdir, 'segments')
pdir = os.path.join(wdir, 'plda')
final_segments_dir = os.path.join(wdir, 'results')
linear_bic_dir = os.path.join(wdir, 'linear.bic')
bic_hac_dir = os.path.join(wdir, 'bic.hac')
hac_iv_dir = os.path.join(wdir, 'hac.iv')
results_vit_dir = os.path.join(wdir, 'results.vit')

if not os.path.exists(wdir):
    os.makedirs(wdir)
if not os.path.exists(sdir):
    os.makedirs(sdir)    
if not os.path.exists(pdir):
    os.makedirs(pdir)
if not os.path.exists(final_segments_dir):
    os.makedirs(final_segments_dir)
if not os.path.exists(linear_bic_dir):
    os.makedirs(linear_bic_dir)    
if not os.path.exists(bic_hac_dir):
    os.makedirs(bic_hac_dir)
if not os.path.exists(hac_iv_dir):
    os.makedirs(hac_iv_dir)
if not os.path.exists(results_vit_dir):
    os.makedirs(results_vit_dir)    
    
input_show = os.path.join('audio', show + '.wav')    
segments_dir = 'segments'    
plda_dir = 'plda'    
data_dir = 'data'
mfcc_dir = 'mfcc'
idmap_fn = show + '.idmap.h5'
model_fn = 'data/model/ester_model_1024_300_150.h5'
hac_diar_fn = os.path.join(wdir, segments_dir, show + '.hac.{:.2f}.seg')
score_fn = os.path.join(wdir, plda_dir, show + '.score.plda.h5') 
mfcc_fn = os.path.join(wdir, mfcc_dir, show + '.mfcc.h5')

#Segmentation file's directories
init_seg = os.path.join(wdir, segments_dir, 'init.seg')
gd_seg = os.path.join(wdir, segments_dir, 'gd_seg_250.seg')
linear_bic_seg = os.path.join(linear_bic_dir, 'li_bic.{:.2f}.seg')
mfcc_m_speaker = os.path.join(wdir, mfcc_dir, show + '.{:.2f}.{:.2f}.test.mfcc.h5')
bic_hac_seg = os.path.join(bic_hac_dir, 'bic_hac.{:.2f}.{:.2f}.seg')
hac_iv_seg = os.path.join(final_segments_dir, 'bic_hac.{:.2f}.{:.2f}.{:.2f}.seg')
results_vit_seg = os.path.join(results_vit_dir, 'bic_hac.{:.2f}.{:.2f}.{:.2f}.vit.-250.seg')


## Loading VAD and cepstral features with delta and double delta. Donot run if precomputed!

In [None]:
fe = get_feature_extractor(input_show, type_feature_extractor='basic')
fe.save(show, output_feature_filename=mfcc_fn)


In [None]:
fs = get_feature_server(mfcc_fn, feature_server_type='sid')
cep, _ = fs.load(show)
cep[np.logical_not(_)] = 0

In [None]:
#Parameters
win_size = 250

#linear bic segmentation parameters
#postive value of delta bic indicates we need two separate models for the adjacent segments. 
#Here we define the threshodls
li_bic_p_start = .2
li_bic_p_stop = 2.0
li_bic_p_num = 10

#bic hac threshold for merging clusters
bic_hac_start = .5
bic_hac_end = 3
bic_hac_num = 10

#hac_iv threshold
t_min = -50
t_max = 100
t_num = 15

vit_penalty = -250


###  Perform 3 layers of segmentation. Initial ( complete file is segmented), Gaussian divergence based segmentaion, followed by linear bic segmentation. ivectors are extracted and then clustered in the below code

In [None]:
init_diar = segmentation.init_seg(cep, show)
Diar.write_seg(init_seg, init_diar)
gd_diar = segmentation.segmentation(cep, init_diar, win_size)
Diar.write_seg(gd_seg, gd_diar)

In [None]:
#ToDo Add bic clustering followed by add more threshold for i-vector scoring. Store files in multiple formats
#ToDo Make code multi-threaded and scalable for GPU

In [None]:
model_iv = ModelIV(model_fn)
f = open('error.log', 'a+')
counter = 0
for t1 in np.linspace(li_bic_p_start, li_bic_p_stop, li_bic_p_num):
    try:
        # Loading Segmentations with varying thresholds
        bicl_diar = segmentation.bic_linear(cep, gd_diar, t1, sr=False)
        Diar.write_seg(linear_bic_seg.format(t1), bicl_diar)
        
        # Bic HAC
        for bic_value in np.linspace(bic_hac_start, bic_hac_end, bic_hac_num):
            bic = hac_bic.HAC_BIC(cep, bicl_diar, bic_value, sr=False)
            bic_hac_diar = bic.perform(to_the_end=True)
            Diar.write_seg(bic_hac_seg.format(t1, bic_value), bic_hac_diar)
            vit_diar = viterbi.viterbi_decoding(cep, bic_hac_diar, vit_penalty)

            # Extracting features per speaker
            fe = get_feature_extractor(input_show, type_feature_extractor='sid')
            idmap_bic = fe.save_multispeakers(bic_hac_diar.id_map(), \
                                              output_feature_filename=mfcc_m_speaker.format(t1, bic_value) \
                                              , keep_all=False)

            # training i-vectors
            fs = get_feature_server(mfcc_m_speaker.format(t1, bic_value), 'sid')
            model_iv.train(fs, idmap_bic)
            # Using plda to gather scores
            distance = model_iv.score_plda_slow()
            distance.write(score_fn)
            scores = Scores(scores_file_name=score_fn)
            #cep_vit, _ = fs.load(input_show, input_feature_filename=mfcc_m_speaker.format(t1, bic_value)) 

            # Using AHC on calculated scores
            for hac_value in np.linspace(t_min, t_max, t_num):
                diar_iv, _, _ = hac_iv(bic_hac_diar, scores, threshold=hac_value)
                Diar.write_seg(hac_iv_seg.format(t1, bic_value, hac_value), diar_iv)
                
                #viterbi resegmentation
                vit_diar = viterbi.viterbi_decoding(cep, diar_iv, vit_penalty)
                Diar.write_seg(results_vit_seg.format(t1, bic_value, hac_value), vit_diar)
                
                
    except Exception as e:
        traceback.print_exc()
        counter += 1
        f.write(str(e))
        continue
f.close()    

In [None]:
counter

### Compute DER

In [None]:
import pandas as pd
from pyannote.core import PYANNOTE_URI, PYANNOTE_SEGMENT, PYANNOTE_LABEL, PYANNOTE_TRACK
from pyannote.core import Annotation, Segment

In [None]:
def mdtm_convert_to_annote(filename=None, flag='seg'):
    cols = [PYANNOTE_TRACK, 'channel_no', PYANNOTE_SEGMENT,\
            'duration', 'speaker', 'trash1', 'trash2', PYANNOTE_LABEL]
    #df = pd.read_csv(filename, header=None, sep=' ', names=cols)
    if flag == 'seg':
        df = Diar.read_seg(filename)
    if flag == 'mdtm':
        df = Diar.read_mdtm(filename)
    temp_annotation = Annotation()
    rate = .1
    for row in df:
        start = row['start']*rate
        stop = row['stop']*rate
        label = row['cluster']
        filename = row['show']
        #print(start, stop, label, filename)
        temp_annotation[Segment(start, stop), filename] = label
    return temp_annotation.copy()    

In [None]:
from pyannote.metrics.diarization import DiarizationErrorRate
#result_dir = os.path.join(wdir, results_vit_dir)
result_dir = results_vit_dir
files = os.listdir(results_vit_dir)
ref = mdtm_convert_to_annote('es2003a', 'mdtm')
results = []
for file_name in files:
    result_file = os.path.join(result_dir, file_name)
    hyp = mdtm_convert_to_annote(result_file, 'seg')
    diarizationErrorRate = DiarizationErrorRate(skip_overlap=True, collar=.25)
    der = diarizationErrorRate(ref, hyp, uem=Segment(0, 1139))
    #print("DER = {0:.3f}".format(der))
    results.append(der)

In [None]:
# Results for feature type basic with feature type SID. 4000 experiments
min(results)

In [None]:
# Results for feature type when SID. 1640 experiments
min(results)

In [None]:
# Perfect DER
diarizationErrorRate = DiarizationErrorRate()
der = diarizationErrorRate(ref, ref, uem=Segment(0, 1139))
print("DER = {0:.3f}".format(der))
