# preprocess

In [1]:
%matplotlib inline

import librosa
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
import torch
import os
import glob
import pickle
import copy
import random
import time
import traceback
from multiprocessing import Process

In [2]:
fft = librosa.get_fftlib()
class VoxIterableDataset(object):
    def __init__(self, data_dir_dict, data_len_dict, config):        
        with open(data_dir_dict['spk2utt_train_dict'], 'rb') as handle:
            self.spk2utt_train_dict = pickle.load(handle)
        with open(data_dir_dict['music_dict'], 'rb') as handle:
            self.music_dict = pickle.load(handle)
        with open(data_dir_dict['noise_dict'], 'rb') as handle:
            self.noise_dict = pickle.load(handle)
        with open(data_dir_dict['babble_dict'], 'rb') as handle:
            self.babble_dict = pickle.load(handle)
        with open(data_dir_dict['rir_dict'], 'rb') as handle:
            self.rir_dict = pickle.load(handle)
            
        with open(data_len_dict['spk2utt_train_len'], 'rb') as handle:
            self.spk2utt_train_len = pickle.load(handle)
        with open(data_len_dict['music_len'], 'rb') as handle:
            self.music_len = pickle.load(handle)
        with open(data_len_dict['noise_len'], 'rb') as handle:
            self.noise_len = pickle.load(handle)
        with open(data_len_dict['babble_len'], 'rb') as handle:
            self.babble_len = pickle.load(handle)
        
        
        self.random_spkrs_batchlist = None
        self.ramdom_batch_len = None
        self.random_noise_type = None
        
        
        self.possible_babble_num = [3, 4, 5, 6, 7]
        self.possible_babble_snr = [13, 15, 17, 20]
        self.possible_noise_snr = [0, 5, 10, 15]
        self.possible_music_snr = [5, 8, 10, 15]
        
        self.sr = config['sr']
        self.repeats = config['repeats']
        self.batch_size = config['batch_size']
        self.extended_prefectch = config['extended_prefectch']
        
        self.mfcc_dim = 30
        
        # Auxiliary paras
        self.multi_read_count = 0
        self.preload_mem = False
        
        
        
    def __iter__(self):
        return self
    
    def __next__(self):
        assert len(self.ramdom_batch_len) == len(self.random_spkrs_batchlist)
        try:
            batch_frame_len = self.ramdom_batch_len.pop(0)
            batch_spkrs = self.random_spkrs_batchlist.pop(0)
            batch_noise_type = self.random_noise_type.pop(0)
            batched_feats = np.zeros([self.batch_size, batch_frame_len, self.mfcc_dim])
            batched_labels = np.zeros(self.batch_size)
            
            for batch_index, (spkr, noise_type) in enumerate(zip(batch_spkrs, batch_noise_type)):
                
                concat_wav, VAD_result = self._colleting_and_slicing(spkr, batch_frame_len,\
                hop_len=160, extended_prefectch=self.extended_prefectch)
            
                
                if noise_type == 0:
                    aug_wav = concat_wav
                
                elif noise_type == 1:
                    aug_wav = self._add_rebverb(concat_wav)
                   
                elif noise_type == 2:
                    aug_wav = self._add_noise(concat_wav)
                    
                elif noise_type == 3:
                    aug_wav = self._add_music(concat_wav)
                  
                elif noise_type == 4:
                    aug_wav = self._add_babble(concat_wav)
             
                else:
                    raise NotImplementedError
                    
            
                single_feats = librosa.feature.mfcc(y=aug_wav, sr=self.sr, n_mfcc=30, \
                dct_type=2, n_fft=512, hop_length=160, \
                win_length=None, window='hann', power=2.0, \
                center=True, pad_mode='reflect', n_mels=30, \
                fmin=20, fmax=7600)
                # Note single_feats needs transpose
                out_feats = self._CMVN(single_feats.T, cmn_window = 300, normalize_variance = False)
                # Apply VAD
                assert out_feats.shape[0] == VAD_result.shape[0]
                out_feats = out_feats[VAD_result.astype(np.bool)]
                batched_feats[batch_index] = out_feats[:batch_frame_len]
                batched_labels[batch_index] = spkr
                
            return batched_feats, batched_labels
        
        except IndexError:
            raise StopIteration

    def process_one_utt(self, utt_dir):
        try:
            concat_wav, _ = librosa.load(utt_dir, sr=self.sr)
            
            VAD_result = self._VAD_detection(concat_wav)
            
            aug_wav = concat_wav

            single_feats = librosa.feature.mfcc(y=aug_wav, sr=self.sr, n_mfcc=30, \
            dct_type=2, n_fft=512, hop_length=160, \
            win_length=None, window='hann', power=2.0, \
            center=True, pad_mode='reflect', n_mels=30, \
            fmin=20, fmax=7600)
            # Note single_feats needs transpose
            out_feats = self._CMVN(single_feats.T, cmn_window = 300, normalize_variance = False)
            # Apply VAD
            assert out_feats.shape[0] == VAD_result.shape[0]
            out_feats = out_feats[VAD_result.astype(np.bool)]
            
            batched_feats = out_feats[None, :, :]
                
            return batched_feats
        
        except Exception:
            traceback.print_exc()

    def process_one_utt_noVAD(self, utt_dir):
        try:
            concat_wav, _ = librosa.load(utt_dir, sr=self.sr)
            
            aug_wav = concat_wav

            single_feats = librosa.feature.mfcc(y=aug_wav, sr=self.sr, n_mfcc=30, \
            dct_type=2, n_fft=512, hop_length=160, \
            win_length=None, window='hann', power=2.0, \
            center=True, pad_mode='reflect', n_mels=30, \
            fmin=20, fmax=7600)
            # Note single_feats needs transpose
            out_feats = self._CMVN(single_feats.T, cmn_window = 300, normalize_variance = False)
            
            batched_feats = out_feats[None, :, :]
                
            return batched_feats
        
        except Exception:
            traceback.print_exc()            
    
    def noise_data_preload(self):
        print('preloading music_dict')
        for count, i in enumerate(self.music_dict):
            _, _ = librosa.load(self.music_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)
        print('preloading noise_dict')        
        for count, i in enumerate(self.noise_dict):
            _, _ = librosa.load(self.noise_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)
        print('preloading babble_dict')        
        for count, i in enumerate(self.babble_dict):
            _, _ = librosa.load(self.babble_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)
    
    def noise_data_preload2mem(self):
        print('preloading to memory')
        
        self.music_preload_dict = {}
        self.noise_preload_dict = {}
        self.babble_preload_dict = {}
        self.preload_mem = True
        print('preloading music_dict')
        for count, i in enumerate(self.music_dict):
            self.music_preload_dict[i], _ = librosa.load(self.music_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)
        print('preloading noise_dict')        
        for count, i in enumerate(self.noise_dict):
            self.noise_preload_dict[i], _ = librosa.load(self.noise_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)
        print('preloading babble_dict')        
        for count, i in enumerate(self.babble_dict):
            self.babble_preload_dict[i], _ = librosa.load(self.babble_dict[i], sr=self.sr)
            if (count+1)%100 == 0:
                print(count+1)       
        
        
    def get_random_list(self):
        spkrs_list = self.repeats * list(self.spk2utt_train_dict.keys())
        random.shuffle(spkrs_list)
        len_spkrs_list = len(spkrs_list)
        self.random_spkrs_batchlist = [spkrs_list[i*self.batch_size:i*self.batch_size+self.batch_size]\
        for i in range(len_spkrs_list // self.batch_size)]
        
        self.ramdom_batch_len = [random.randint(200, 400) for i in range(len_spkrs_list // self.batch_size)]
        
        noise_type_list = [i%5 for i in range(len_spkrs_list)]

        random.shuffle(noise_type_list)
        self.random_noise_type = [noise_type_list[i*self.batch_size:i*self.batch_size+self.batch_size]\
        for i in range(len_spkrs_list // self.batch_size)]
        
        assert len(self.random_spkrs_batchlist) == len(self.ramdom_batch_len)\
        == len(self.random_noise_type)
        
    def _colleting_and_slicing(self, spkr, batch_frame_len, hop_len=160, extended_prefectch=2.0):
        
        least_wav_len = (batch_frame_len - 1) * hop_len
        concat_utt = np.zeros(0)
        valid_frames_len = 0
        
        # Use to count multi_read_count
        get_count = 0

        while valid_frames_len < batch_frame_len:
            concat_utt = np.zeros(0)

            utt_dir = self._get_random_spk_utt(spkr, self.spk2utt_train_dict)
            utt_len = self.spk2utt_train_len[utt_dir]
#             off = self._get_random_offset(least_wav_len, utt_len) / self.sr
            off = self._get_random_offset(least_wav_len+extended_prefectch*self.sr, utt_len) / self.sr
            dur = least_wav_len / self.sr + extended_prefectch
            
            utt_part, _ = librosa.load(utt_dir, sr=self.sr, offset=off, duration=dur)
            
            concat_utt = np.append(concat_utt, utt_part)
            detected_frames = self._VAD_detection(concat_utt)
            valid_frames_len = np.sum(detected_frames)

            get_count += 1

        if get_count > 1:
            self.multi_read_count += 1

        VAD_result = detected_frames
        return concat_utt, VAD_result
    
    def _add_rebverb(self, in_wav):
        power_before_reverb = in_wav.dot(in_wav) / len(in_wav)
        shift_index = 0
        signal = in_wav
        filter_dir = self._get_random_noise(self.rir_dict)
        filter, _ = librosa.load(filter_dir, sr=self.sr)
        
        signal_length = len(signal)
        filter_length = len(filter)
        output_length = signal_length + filter_length - 1
        output = np.zeros(output_length)

        fft_length = 2**np.ceil(np.log2(4 * filter_length)).astype(np.int)
        block_length = fft_length - filter_length + 1


        filter_padded = np.zeros(fft_length)
        filter_padded[0:filter_length] = filter
        filter_padded = fft.rfft(filter_padded)



        for i in range(signal_length//block_length + 1):
            process_length = min(block_length, signal_length - i * block_length);
            signal_block_padded = np.zeros(fft_length)
            signal_block_padded[0:process_length] = signal[i * block_length : i * block_length + process_length]
            signal_block_padded = fft.rfft(signal_block_padded)

            signal_block_padded = filter_padded * signal_block_padded

            signal_block_padded = fft.irfft(signal_block_padded, n=fft_length)

            if (i*block_length + fft_length) <= output_length:
                output[i*block_length : i*block_length + fft_length] += signal_block_padded
            else:
                output[i*block_length : output_length] += signal_block_padded[:output_length-i*block_length]
        
        # shift with max index of filter
        shift_index = np.argmax(filter)
        
        final_out = output[shift_index:shift_index+signal_length]
        power_after_reverb = final_out.dot(final_out) / len(final_out)
        final_out = np.sqrt(power_before_reverb/power_after_reverb) * final_out
        out_wav = final_out
        
        return out_wav
    
    def _add_noise(self, in_wav):
        power_before_reverb = in_wav.dot(in_wav) / len(in_wav)
        shift_index = 0
        signal = np.zeros(len(in_wav))
        signal[:] = in_wav[:]
        
        signal_len = len(signal)
        total_noise_len = 0
        signal_off = 0
        while total_noise_len < signal_len:
            
            noise_dir, noise_index = self._get_random_noise(self.noise_dict, return_index=True)
            noise_len = self.noise_len[noise_index]
            if noise_len > signal_len:
                noise_off = self._get_random_offset(signal_len, noise_len)
                total_noise_len += signal_len
                if self.preload_mem:
                    noise = self.noise_preload_dict[noise_index][noise_off:noise_off+signal_len]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr, offset=noise_off/self.sr,\
                    duration=signal_len/self.sr)
                
            else:
                total_noise_len += noise_len
                if self.preload_mem:
                    noise = self.noise_preload_dict[noise_index]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr)
                
            snr_db = self.possible_noise_snr[random.randint(0, len(self.possible_noise_snr)-1)]
        
            signal = self._add_db(signal, noise, signal_off, snr_db, power_before_reverb)
            
            signal_off += len(noise)
        
        output = signal
        final_out = output[shift_index:shift_index+signal_len]
        power_after_reverb = final_out.dot(final_out) / len(final_out)
        final_out = np.sqrt(power_before_reverb/power_after_reverb) * final_out
        out_wav = final_out
        
        return out_wav
    
    def _add_music(self, in_wav):
        power_before_reverb = in_wav.dot(in_wav) / len(in_wav)
        shift_index = 0
        signal = np.zeros(len(in_wav))
        signal[:] = in_wav[:]
        
        signal_len = len(signal)
        total_noise_len = 0
        signal_off = 0
        while total_noise_len < signal_len:
            
            noise_dir, noise_index = self._get_random_noise(self.music_dict, return_index=True)
            noise_len = self.music_len[noise_index]
            if noise_len > signal_len:
                noise_off = self._get_random_offset(signal_len, noise_len)
                total_noise_len += signal_len
                if self.preload_mem:
                    noise = self.music_preload_dict[noise_index][noise_off:noise_off+signal_len]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr, offset=noise_off/self.sr,\
                    duration=signal_len/self.sr)
            else:
                total_noise_len += noise_len
                if self.preload_mem:
                    noise = self.music_preload_dict[noise_index]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr)
                
            snr_db = self.possible_music_snr[random.randint(0, len(self.possible_music_snr)-1)]
        
            signal = self._add_db(signal, noise, signal_off, snr_db, power_before_reverb)
            
            signal_off += len(noise)
        
        output = signal
        final_out = output[shift_index:shift_index+signal_len]
        power_after_reverb = final_out.dot(final_out) / len(final_out)
        final_out = np.sqrt(power_before_reverb/power_after_reverb) * final_out
        out_wav = final_out
        
        return out_wav
    
    def _add_babble(self, in_wav):
        power_before_reverb = in_wav.dot(in_wav) / len(in_wav)
        shift_index = 0
        signal = np.zeros(len(in_wav))
        signal[:] = in_wav[:]
        
        signal_len = len(signal)
        signal_off = 0
        bg_spks_num = self.possible_babble_num[random.randint(0, len(self.possible_babble_num)-1)]    
        for _ in range(bg_spks_num):            
            noise_dir, noise_index = self._get_random_noise(self.babble_dict, return_index=True)
            noise_len = self.babble_len[noise_index]
            if noise_len > signal_len:
                noise_off = self._get_random_offset(signal_len, noise_len)
                if self.preload_mem:
                    noise = self.babble_preload_dict[noise_index][noise_off:noise_off+signal_len]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr, offset=noise_off/self.sr,\
                    duration=signal_len/self.sr)
            else:
                if self.preload_mem:
                    noise = self.babble_preload_dict[noise_index]
                else:
                    noise, _ = librosa.load(noise_dir, sr=self.sr)
                
            snr_db = self.possible_babble_snr[random.randint(0, len(self.possible_babble_snr)-1)]
        
            signal = self._add_db(signal, noise, signal_off, snr_db, power_before_reverb)
            
        output = signal
        final_out = output[shift_index:shift_index+signal_len]
        power_after_reverb = final_out.dot(final_out) / len(final_out)
        final_out = np.sqrt(power_before_reverb/power_after_reverb) * final_out
        out_wav = final_out
        
        return out_wav
    
    def _add_db(self, in_wav, noise, signal_off, snr_db, power_before_reverb):
        signal = np.zeros(len(in_wav))
        signal[:] = in_wav[:]

        noise_power = noise.dot(noise) / len(noise)
        scale_factor = np.sqrt(10**(-snr_db / 10) * power_before_reverb / noise_power)
        noise = scale_factor * noise

        add_length = min(len(noise), len(signal)-signal_off)
        signal[signal_off:signal_off+add_length] += noise[:add_length]
        out_wav = signal      
        
        return out_wav
    
    def _CMVN(self, in_feat, cmn_window = 300, normalize_variance = False):             
        num_frames = in_feat.shape[0]
        dim = in_feat.shape[1]
        last_window_start = -1
        last_window_end = -1
        cur_sum = np.zeros(dim)
        cur_sumsq = np.zeros(dim)

        out_feat = np.zeros([num_frames, dim])

        for t in range(num_frames):
            window_start = 0
            window_end = 0

            window_start = t - int(cmn_window / 2)
            window_end = window_start + cmn_window

            if (window_start < 0):
                window_end -= window_start
                window_start = 0

            if (window_end > num_frames):
                window_start -= (window_end - num_frames)
                window_end = num_frames
                if (window_start < 0):
                    window_start = 0

            if (last_window_start == -1):
                input_part = in_feat[window_start:window_end]
                cur_sum = np.sum(input_part, axis=0, keepdims=False)
                if normalize_variance:
                    cur_sumsq = np.sum(input_part**2, axis=0, keepdims=False)
            else:
                if (window_start > last_window_start):
                    frame_to_remove = in_feat[last_window_start]
                    cur_sum -= frame_to_remove
                    if normalize_variance:
                        cur_sumsq -= frame_to_remove**2

                if (window_end > last_window_end):
                    frame_to_add = in_feat[last_window_end]
                    cur_sum += frame_to_add
                    if normalize_variance:
                        cur_sumsq += frame_to_add**2

            window_frames = window_end - window_start
            last_window_start = window_start
            last_window_end = window_end

            out_feat[t] = in_feat[t] - (1.0 / window_frames) * cur_sum


            if normalize_variance:
                if (window_frames == 1):
                    out_feat[t] = 0.0
                else:
                    variance = (1.0 / window_frames) * cur_sumsq - (1.0 / window_frames**2) * cur_sum**2
                    variance = np.maximum(1.0e-10, variance)
                    out_feat[t] /= variance**(0.5)
                    
        return out_feat

    def _get_random_noise(self, noise_dict, return_index=False):
        dict_len = len(noise_dict)
        i = random.randint(0, dict_len-1)
        noise_dir = noise_dict[i]
        
        if return_index:
            return noise_dir, i
        else:
            return noise_dir
    
    def _get_random_spk_utt(self, spkr, spk2utt):
        this_utts = spk2utt[spkr]
        this_num_utts = len(this_utts)
        i = random.randint(0, this_num_utts-1)
        utt_dir = this_utts[i]
        return utt_dir

    def _get_random_offset(self, expected_length, utt_len):
        if expected_length > utt_len:
            return 0
        
        free_length = utt_len - expected_length
        offset = random.randint(0, free_length)
        return offset
        
    @property
    def _VAD_config(self):
        vad_energy_threshold = -3.0
        vad_energy_mean_scale = 1.0
        vad_frames_context = 0
        vad_proportion_threshold = 0.12
        
        return vad_energy_threshold, vad_energy_mean_scale,\
        vad_frames_context, vad_proportion_threshold
        
        
    def _VAD_detection(self, wav):
        vad_energy_threshold, vad_energy_mean_scale,\
        vad_frames_context, vad_proportion_threshold = self._VAD_config
        
        y_tmp = np.pad(wav, int(512 // 2), mode='reflect')
        y_tmp = librosa.util.frame(y_tmp, frame_length=512, hop_length=160)
        y_log_energy = np.log(np.maximum(np.sum(y_tmp**2, axis=0), 1e-15))

        T = len(y_log_energy)
        output_voiced = np.zeros(T)
        if (T == 0):
            raise Exception("zero wave length")

        energy_threshold = vad_energy_threshold
        if (vad_energy_mean_scale != 0.0):
            assert(vad_energy_mean_scale > 0.0)
            energy_threshold += vad_energy_mean_scale * np.sum(y_log_energy) / T


        assert(vad_frames_context >= 0)
        assert(vad_proportion_threshold > 0.0 and vad_proportion_threshold < 1.0);

        for t in range(T):
            num_count = 0
            den_count = 0
            context = vad_frames_context
            for t2 in range(t - context, t + context+1):
                if (t2 >= 0 and t2 < T):
                    den_count+=1
                    if (y_log_energy[t2] > energy_threshold):
                        num_count+=1

            if (num_count >= den_count * vad_proportion_threshold):
                output_voiced[t] = 1.0
            else:
                output_voiced[t] = 0.0
        
        return output_voiced

# process_train

In [155]:
OPT_INDEX = '/Lun0/zhiyong/dataset'
data_dir_dict = {}

# val
data_dir_dict['spk2utt_train_dict'] = os.path.join(OPT_INDEX, 'spk2utt_val_dict')
data_dir_dict['music_dict'] = os.path.join(OPT_INDEX, 'music_dict')
data_dir_dict['noise_dict'] = os.path.join(OPT_INDEX, 'noise_dict')
data_dir_dict['babble_dict'] = os.path.join(OPT_INDEX, 'babble_dict')
data_dir_dict['rir_dict'] = os.path.join(OPT_INDEX, 'rir_dict')

In [21]:
data_len_dict = {}

data_len_dict['spk2utt_train_len'] = os.path.join(OPT_INDEX, 'spk2utt_val_len')
data_len_dict['music_len'] = os.path.join(OPT_INDEX, 'music_len')
data_len_dict['noise_len'] = os.path.join(OPT_INDEX, 'noise_len')
data_len_dict['babble_len'] = os.path.join(OPT_INDEX, 'babble_len')

In [22]:
train_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/task2_train/*')

In [None]:
train_labels = {}
with open('/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/docs/train_labels.txt', 'r') as f:
    for count, line in enumerate(f):
        if count == 0:
            continuee(line)
            continue
        line = line[:-1]
        utt, label = line.split
        train_labels[utt] = label

In [None]:
len(train_labels)

In [23]:
len(train_list)

110673

In [25]:
config = {}

config['sr'] = 16000
config['repeats'] = None
config['batch_size'] = None
config['extended_prefectch'] = None

# trial_dict_dir = '/Lun0/zhiyong/dataset/trial_dict'

def trial_data_preload(dataset, i, train_list, train_labels):
        
    for i, line in enumerate(train_list):
        data = dataset.process_one_utt(line)
        utt_label = line.split('/')[-1][:-4]
        label = train_labels[utt_label]
        with open('/Lun0/zhiyong/SdSV_2020_deepmine/train_mfcc/'+utt_label, 'wb') as handle:
            pickle.dump((data.astype(np.float16), [label]), handle)
        if ((i+1) % 1000) == 0:    
            print(i+1)

dataset = VoxIterableDataset(data_dir_dict, data_len_dict, config)

processes = [Process(target = trial_data_preload, args = (dataset, i, train_list, train_labels)) for i in range(1)]
start_time = time.time()
[p.start() for p in processes]
joined = [p.join() for p in processes]
print(time.time()-start_time)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
1779.1428241729736


# process_enr

In [20]:
OPT_INDEX = '/Lun0/zhiyong/dataset'
data_dir_dict = {}

# val
data_dir_dict['spk2utt_train_dict'] = os.path.join(OPT_INDEX, 'spk2utt_val_dict')
data_dir_dict['music_dict'] = os.path.join(OPT_INDEX, 'music_dict')
data_dir_dict['noise_dict'] = os.path.join(OPT_INDEX, 'noise_dict')
data_dir_dict['babble_dict'] = os.path.join(OPT_INDEX, 'babble_dict')
data_dir_dict['rir_dict'] = os.path.join(OPT_INDEX, 'rir_dict')

In [21]:
data_len_dict = {}

data_len_dict['spk2utt_train_len'] = os.path.join(OPT_INDEX, 'spk2utt_val_len')
data_len_dict['music_len'] = os.path.join(OPT_INDEX, 'music_len')
data_len_dict['noise_len'] = os.path.join(OPT_INDEX, 'noise_len')
data_len_dict['babble_len'] = os.path.join(OPT_INDEX, 'babble_len')

In [22]:
enr_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/wav/enrollment/*')

In [23]:
len(enr_list)

110673

In [25]:
config = {}

config['sr'] = 16000
config['repeats'] = None
config['batch_size'] = None
config['extended_prefectch'] = None

# trial_dict_dir = '/Lun0/zhiyong/dataset/trial_dict'

def trial_data_preload(dataset, i, enr_list):
        
    for i, line in enumerate(enr_list):
        data = dataset.process_one_utt(line)
        label = line.split('/')[-1][:-4]
        with open('/Lun0/zhiyong/SdSV_2020_deepmine/enr_mfcc/'+label, 'wb') as handle:
            pickle.dump((data.astype(np.float16), [label]), handle)
        if ((i+1) % 1000) == 0:    
            print(i+1)

dataset = VoxIterableDataset(data_dir_dict, data_len_dict, config)

processes = [Process(target = trial_data_preload, args = (dataset, i, enr_list)) for i in range(1)]
start_time = time.time()
[p.start() for p in processes]
joined = [p.join() for p in processes]
print(time.time()-start_time)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000
93000
94000
95000
96000
97000
98000
99000
100000
101000
102000
103000
104000
105000
106000
107000
108000
109000
110000
1779.1428241729736


# process_evl

In [28]:
OPT_INDEX = '/Lun0/zhiyong/dataset'
data_dir_dict = {}

# val
data_dir_dict['spk2utt_train_dict'] = os.path.join(OPT_INDEX, 'spk2utt_val_dict')
data_dir_dict['music_dict'] = os.path.join(OPT_INDEX, 'music_dict')
data_dir_dict['noise_dict'] = os.path.join(OPT_INDEX, 'noise_dict')
data_dir_dict['babble_dict'] = os.path.join(OPT_INDEX, 'babble_dict')
data_dir_dict['rir_dict'] = os.path.join(OPT_INDEX, 'rir_dict')

In [30]:
data_len_dict = {}

data_len_dict['spk2utt_train_len'] = os.path.join(OPT_INDEX, 'spk2utt_val_len')
data_len_dict['music_len'] = os.path.join(OPT_INDEX, 'music_len')
data_len_dict['noise_len'] = os.path.join(OPT_INDEX, 'noise_len')
data_len_dict['babble_len'] = os.path.join(OPT_INDEX, 'babble_len')

In [31]:
evl_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/evaluation/wav/evaluation/*')

In [32]:
len(evl_list)

69542

In [33]:
config = {}

config['sr'] = 16000
config['repeats'] = None
config['batch_size'] = None
config['extended_prefectch'] = None

# trial_dict_dir = '/Lun0/zhiyong/dataset/trial_dict'

def trial_data_preload(dataset, i, evl_list):
        
    for i, line in enumerate(evl_list):
        data = dataset.process_one_utt(line)
        label = line.split('/')[-1][:-4]
        with open('/Lun0/zhiyong/SdSV_2020_deepmine/evl_mfcc/'+label, 'wb') as handle:
            pickle.dump((data.astype(np.float16), [label]), handle)
        if ((i+1) % 1000) == 0:    
            print(i+1)

dataset = VoxIterableDataset(data_dir_dict, data_len_dict, config)

processes = [Process(target = trial_data_preload, args = (dataset, i, evl_list)) for i in range(1)]
start_time = time.time()
[p.start() for p in processes]
joined = [p.join() for p in processes]
print(time.time()-start_time)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
502.18626618385315


# Make train index

In [158]:
train_data_dir = '/Lun0/zhiyong/SdSV_2020_deepmine/train_mfcc'
expected_len = 85764
workers = 1
single_worker_len = int(expected_len / workers)
output = '/Lun0/zhiyong/SdSV_2020_deepmine/train_mfcc.csv'

In [159]:
train_mfcc_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/train_mfcc/*')

In [160]:
assert expected_len == len(train_mfcc_list)

In [161]:
count = 0
with open(output, 'w') as f:
    for i in train_mfcc_list:
        path = i
        assert os.path.isfile(path)
        f.write(path+'\n')
        count += 1

In [162]:
count

85764

# Make enr index

In [34]:
train_data_dir = '/Lun0/zhiyong/SdSV_2020_deepmine/enr_mfcc'
expected_len = 110673
workers = 1
single_worker_len = int(expected_len / workers)
output = '/Lun0/zhiyong/SdSV_2020_deepmine/enr_mfcc.csv'

In [35]:
enr_mfcc_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/enr_mfcc/*')

In [36]:
assert expected_len == len(enr_mfcc_list)

In [37]:
count = 0
with open(output, 'w') as f:
    for i in enr_mfcc_list:
        path = i
        assert os.path.isfile(path)
        f.write(path+'\n')
        count += 1

In [38]:
count

110673

# Make evl index

In [39]:
train_data_dir = '/Lun0/zhiyong/SdSV_2020_deepmine/evl_mfcc'
expected_len = 69542
workers = 1
single_worker_len = int(expected_len / workers)
output = '/Lun0/zhiyong/SdSV_2020_deepmine/evl_mfcc.csv'

In [40]:
evl_mfcc_list = glob.glob('/Lun0/zhiyong/SdSV_2020_deepmine/evl_mfcc/*')

In [41]:
assert expected_len == len(evl_mfcc_list)

In [42]:
count = 0
with open(output, 'w') as f:
    for i in evl_mfcc_list:
        path = i
        assert os.path.isfile(path)
        f.write(path+'\n')
        count += 1

In [43]:
count

69542

# extract feats

In [49]:
import os
import sys
sys.path.append('./train')

import numpy as np
import torch


# import vox_model_bank
from train import train_model_new
from train.read_data import *
from train.my_dataloader import *
# from sklearn.metrics import roc_curve


In [50]:
model_path = '/Lun2/rzz/kaldi-master/egs/zhiyong/sre19/exp/Xvector_SAP_nodilate_1L_long_cosanel_newloader_newdata_2(test)/ckpt/min_eer.model'
model_id = 'Xvector_SAP_nodilate_1L'
model_metric = 'AM_normfree_softmax_anneal_ce_head'

In [163]:
component_dir = './sdsvc'
PLDA_DIM = 370
PLDA_DATA_NAME = 'plda_data'
TEST_DATA_NAME = 'test_data_370'
PLDA_PARA_NAME = 'plda_para_370'
SCORING_PLDA_NAME = 'score_plda_370'
SCORING_COSINE_NAME = 'score_cosine_370'
ENR_DATA_NAME = 'enr_data'
EVL_DATA_NAME = 'evl_data'
TRAIN_DATA_NAME = 'train_data'

In [52]:
if not os.path.isdir(component_dir):
    os.makedirs(component_dir)

# GPU multiprocess for train

In [164]:
from multiprocessing import Process, Manager

In [165]:
num_p = 10

In [166]:
# train_list = '/Lun0/zhiyong/dataset/plda_full_data.csv'
train_list = '/Lun0/zhiyong/SdSV_2020_deepmine/train_mfcc.csv'
# train_data = CSVDataSet(train_list)
train_data = PickleDataSet(train_list)

In [167]:
train_data_len = len(train_data)
num_per_process = (85764 // num_p) + 1

In [168]:
manager = Manager()
class_list_new_m = manager.list()

In [169]:
data_m = []
# class_list_new_m = []
for i in range(num_p):
    data = torch.utils.data.Subset(train_data, np.arange(i*num_per_process, min((i+1)*num_per_process, train_data_len)))
    data_m.append(data)
    class_list_new_m.append({})
    print(len(data))

8577
8577
8577
8577
8577
8577
8577
8577
8577
8571


In [170]:
def extract_feature_m(i, train_data, class_list_new_m):
#     train_list = '/Lun0/zhiyong/dataset/vox12_kaldi_train_data/vox12_kaldi_train_data.csv'
    model_settings = {'in_feat': 30, 'emb_size': 512, 'class_num': 7323, 's': 50, 'm': 0.2, 'anneal_steps': 0, 'HistK_len': 0}

    torch.backends.cudnn.benchmark = False
    if i < 26:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(i%2)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#     torch.cuda.set_device(i%2)
    device = torch.device("cuda:0")

#     train_data = CSVDataSet(train_list)
#     train_dataloader = DataLoader(dataset=train_data, batch_size = 1, shuffle = False, num_workers = 32, pin_memory=False)

    train_dataloader = My_DataLoader(train_data, batch_size=None, shuffle=False, sampler=None,\
    batch_sampler=None, num_workers=8, collate_fn=None,\
    pin_memory=False, drop_last=False, timeout=0,\
    worker_init_fn=None, multiprocessing_context=None)

    model = train_model_new.get_model(model_id, model_metric, None, model_settings, None)
    checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'], strict=True)
    model = model.to(device)
    model.eval()

    class_list_new = {}

    for count, (batch_x, batch_y) in enumerate(train_dataloader):
        torch.cuda.empty_cache()
        batch_x = batch_x.to(device)
        label = batch_y[0].split('-')[0]
        batch_y = torch.tensor([0]).to(device)
        try:
            with torch.no_grad():
                _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')
        except:
            print('Proc', str(i), 'EER:', label)
            continue
    #     _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')

        emb = emb.squeeze().data.cpu().numpy()

        if label not in class_list_new.keys():
            class_list_new[label] = emb[None, :]
        else:
            class_list_new[label] = np.append(class_list_new[label], emb[None, :], axis=0)

        if (count+1) % 10000 == 0:
            print('Proc '+ str(i) + ':' + str((count+1) // 10000))
    
    class_list_new_m[i] = class_list_new
    del model, batch_x, batch_y
    torch.cuda.empty_cache()

In [171]:
processes = [Process(target = extract_feature_m, args = (i, data_m[i], class_list_new_m)) for i in range(num_p)]
[p.start() for p in processes]
joined = [p.join() for p in processes]

Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank


In [172]:
count = 0
for i in class_list_new_m:
    for j in i:
        count += len(i[j])
count

85764

In [173]:
class_list_new = class_list_new_m[0]

for count, this_list in enumerate(class_list_new_m):
    if count == 0:
        continue
    for this_label in this_list:
        if this_label not in class_list_new.keys():
            class_list_new[this_label] = this_list[this_label]
        else:
            class_list_new[this_label] = np.append(class_list_new[this_label], this_list[this_label], axis=0)

In [174]:
count = 0
for i in class_list_new:
    count += len(class_list_new[i])
count

85764

In [175]:
nanlist = []
for i in class_list_new:
    if np.isnan(class_list_new[i]).any():
        print(i)
        nanlist.append(i)
for i in nanlist:
    class_list_new.pop(i)

In [176]:
count

85764

In [177]:
len(class_list_new)

588

In [178]:
import pickle
out = component_dir+'/'+TRAIN_DATA_NAME
with open(out, 'wb') as handle:
    pickle.dump(class_list_new, handle)

# GPU multiprocess for enr

In [53]:
from multiprocessing import Process, Manager

In [54]:
num_p = 10

In [55]:
# train_list = '/Lun0/zhiyong/dataset/plda_full_data.csv'
train_list = '/Lun0/zhiyong/SdSV_2020_deepmine/enr_mfcc.csv'
# train_data = CSVDataSet(train_list)
train_data = PickleDataSet(train_list)

In [56]:
train_data_len = len(train_data)
num_per_process = (110673 // num_p) + 1

In [57]:
manager = Manager()
class_list_new_m = manager.list()

In [58]:
data_m = []
# class_list_new_m = []
for i in range(num_p):
    data = torch.utils.data.Subset(train_data, np.arange(i*num_per_process, min((i+1)*num_per_process, train_data_len)))
    data_m.append(data)
    class_list_new_m.append({})
    print(len(data))

11068
11068
11068
11068
11068
11068
11068
11068
11068
11061


In [59]:
def extract_feature_m(i, train_data, class_list_new_m):
#     train_list = '/Lun0/zhiyong/dataset/vox12_kaldi_train_data/vox12_kaldi_train_data.csv'
    model_settings = {'in_feat': 30, 'emb_size': 512, 'class_num': 7323, 's': 50, 'm': 0.2, 'anneal_steps': 0, 'HistK_len': 0}

    torch.backends.cudnn.benchmark = False
    if i < 26:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(i%2)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#     torch.cuda.set_device(i%2)
    device = torch.device("cuda:0")

#     train_data = CSVDataSet(train_list)
#     train_dataloader = DataLoader(dataset=train_data, batch_size = 1, shuffle = False, num_workers = 32, pin_memory=False)

    train_dataloader = My_DataLoader(train_data, batch_size=None, shuffle=False, sampler=None,\
    batch_sampler=None, num_workers=8, collate_fn=None,\
    pin_memory=False, drop_last=False, timeout=0,\
    worker_init_fn=None, multiprocessing_context=None)

    model = train_model_new.get_model(model_id, model_metric, None, model_settings, None)
    checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'], strict=True)
    model = model.to(device)
    model.eval()

    class_list_new = {}

    for count, (batch_x, batch_y) in enumerate(train_dataloader):
        torch.cuda.empty_cache()
        batch_x = batch_x.to(device)
        label = batch_y[0].split('-')[0]
        batch_y = torch.tensor([0]).to(device)
        try:
            with torch.no_grad():
                _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')
        except:
            print('Proc', str(i), 'EER:', label)
            continue
    #     _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')

        emb = emb.squeeze().data.cpu().numpy()

        if label not in class_list_new.keys():
            class_list_new[label] = emb[None, :]
        else:
            class_list_new[label] = np.append(class_list_new[label], emb[None, :], axis=0)

        if (count+1) % 10000 == 0:
            print('Proc '+ str(i) + ':' + str((count+1) // 10000))
    
    class_list_new_m[i] = class_list_new
    del model, batch_x, batch_y
    torch.cuda.empty_cache()

In [60]:
processes = [Process(target = extract_feature_m, args = (i, data_m[i], class_list_new_m)) for i in range(num_p)]
[p.start() for p in processes]
joined = [p.join() for p in processes]

Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Proc 8:1
Proc 2:1
Proc 6:1
Proc 0:1
Proc 4:1
Proc 7:1
Proc 1:1
Proc 9:1
Proc 3:1
Proc 5:1


In [61]:
count = 0
for i in class_list_new_m:
    for j in i:
        count += len(i[j])
count

110673

In [62]:
class_list_new = class_list_new_m[0]

for count, this_list in enumerate(class_list_new_m):
    if count == 0:
        continue
    for this_label in this_list:
        if this_label not in class_list_new.keys():
            class_list_new[this_label] = this_list[this_label]
        else:
            class_list_new[this_label] = np.append(class_list_new[this_label], this_list[this_label], axis=0)

In [63]:
count = 0
for i in class_list_new:
    count += len(class_list_new[i])
count

110673

In [64]:
nanlist = []
for i in class_list_new:
    if np.isnan(class_list_new[i]).any():
        print(i)
        nanlist.append(i)
for i in nanlist:
    class_list_new.pop(i)

In [65]:
count

110673

In [66]:
len(class_list_new)

110673

In [68]:
import pickle
out = component_dir+'/'+ENR_DATA_NAME
with open(out, 'wb') as handle:
    pickle.dump(class_list_new, handle)

# GPU multiprocess for evl

In [69]:
from multiprocessing import Process, Manager

In [70]:
num_p = 10

In [71]:
# train_list = '/Lun0/zhiyong/dataset/plda_full_data.csv'
train_list = '/Lun0/zhiyong/SdSV_2020_deepmine/evl_mfcc.csv'
# train_data = CSVDataSet(train_list)
train_data = PickleDataSet(train_list)

In [72]:
train_data_len = len(train_data)
num_per_process = (69542 // num_p) + 1

In [73]:
manager = Manager()
class_list_new_m = manager.list()

In [74]:
data_m = []
# class_list_new_m = []
for i in range(num_p):
    data = torch.utils.data.Subset(train_data, np.arange(i*num_per_process, min((i+1)*num_per_process, train_data_len)))
    data_m.append(data)
    class_list_new_m.append({})
    print(len(data))

6955
6955
6955
6955
6955
6955
6955
6955
6955
6947


In [75]:
def extract_feature_m(i, train_data, class_list_new_m):
#     train_list = '/Lun0/zhiyong/dataset/vox12_kaldi_train_data/vox12_kaldi_train_data.csv'
    model_settings = {'in_feat': 30, 'emb_size': 512, 'class_num': 7323, 's': 50, 'm': 0.2, 'anneal_steps': 0, 'HistK_len': 0}

    torch.backends.cudnn.benchmark = False
    if i < 26:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(i%2)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#     torch.cuda.set_device(i%2)
    device = torch.device("cuda:0")

#     train_data = CSVDataSet(train_list)
#     train_dataloader = DataLoader(dataset=train_data, batch_size = 1, shuffle = False, num_workers = 32, pin_memory=False)

    train_dataloader = My_DataLoader(train_data, batch_size=None, shuffle=False, sampler=None,\
    batch_sampler=None, num_workers=8, collate_fn=None,\
    pin_memory=False, drop_last=False, timeout=0,\
    worker_init_fn=None, multiprocessing_context=None)

    model = train_model_new.get_model(model_id, model_metric, None, model_settings, None)
    checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'], strict=True)
    model = model.to(device)
    model.eval()

    class_list_new = {}

    for count, (batch_x, batch_y) in enumerate(train_dataloader):
        torch.cuda.empty_cache()
        batch_x = batch_x.to(device)
        label = batch_y[0].split('-')[0]
        batch_y = torch.tensor([0]).to(device)
        try:
            with torch.no_grad():
                _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')
        except:
            print('Proc', str(i), 'EER:', label)
            continue
    #     _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')

        emb = emb.squeeze().data.cpu().numpy()

        if label not in class_list_new.keys():
            class_list_new[label] = emb[None, :]
        else:
            class_list_new[label] = np.append(class_list_new[label], emb[None, :], axis=0)

        if (count+1) % 10000 == 0:
            print('Proc '+ str(i) + ':' + str((count+1) // 10000))
    
    class_list_new_m[i] = class_list_new
    del model, batch_x, batch_y
    torch.cuda.empty_cache()

In [76]:
processes = [Process(target = extract_feature_m, args = (i, data_m[i], class_list_new_m)) for i in range(num_p)]
[p.start() for p in processes]
joined = [p.join() for p in processes]

Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank


In [77]:
count = 0
for i in class_list_new_m:
    for j in i:
        count += len(i[j])
count

69542

In [78]:
class_list_new = class_list_new_m[0]

for count, this_list in enumerate(class_list_new_m):
    if count == 0:
        continue
    for this_label in this_list:
        if this_label not in class_list_new.keys():
            class_list_new[this_label] = this_list[this_label]
        else:
            class_list_new[this_label] = np.append(class_list_new[this_label], this_list[this_label], axis=0)

In [79]:
count = 0
for i in class_list_new:
    count += len(class_list_new[i])
count

69542

In [80]:
nanlist = []
for i in class_list_new:
    if np.isnan(class_list_new[i]).any():
        print(i)
        nanlist.append(i)
for i in nanlist:
    class_list_new.pop(i)

In [81]:
count

69542

In [82]:
len(class_list_new)

69542

In [83]:
import pickle
out = component_dir+'/'+EVL_DATA_NAME
with open(out, 'wb') as handle:
    pickle.dump(class_list_new, handle)

# GPU multiprocess for plda

In [5]:
from multiprocessing import Process, Manager

In [6]:
num_p = 28

In [7]:
train_list = '/Lun0/zhiyong/dataset/plda_full_data.csv'
# train_list = '/Lun0/zhiyong/dataset/plda_full_data_noVAD.csv'
# train_data = CSVDataSet(train_list)
train_data = PickleDataSet(train_list)

In [8]:
train_data_len = len(train_data)
num_per_process = (1276888 // num_p) + 1

In [9]:
manager = Manager()
class_list_new_m = manager.list()

In [10]:
data_m = []
# class_list_new_m = []
for i in range(num_p):
    data = torch.utils.data.Subset(train_data, np.arange(i*num_per_process, min((i+1)*num_per_process, train_data_len)))
    data_m.append(data)
    class_list_new_m.append({})
    print(len(data))

45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45604
45580


In [11]:
def extract_feature_m(i, train_data, class_list_new_m):
#     train_list = '/Lun0/zhiyong/dataset/vox12_kaldi_train_data/vox12_kaldi_train_data.csv'
    model_settings = {'in_feat': 30, 'emb_size': 512, 'class_num': 7323, 's': 50, 'm': 0.2, 'anneal_steps': 0, 'HistK_len': 0}

    torch.backends.cudnn.benchmark = False
    if i < 26:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(i%2)
    else:
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#     torch.cuda.set_device(i%2)
    device = torch.device("cuda:0")

#     train_data = CSVDataSet(train_list)
#     train_dataloader = DataLoader(dataset=train_data, batch_size = 1, shuffle = False, num_workers = 32, pin_memory=False)

    train_dataloader = My_DataLoader(train_data, batch_size=None, shuffle=False, sampler=None,\
    batch_sampler=None, num_workers=8, collate_fn=None,\
    pin_memory=False, drop_last=False, timeout=0,\
    worker_init_fn=None, multiprocessing_context=None)

    model = train_model_new.get_model(model_id, model_metric, None, model_settings, None)
    checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'], strict=True)
    model = model.to(device)
    model.eval()

    class_list_new = {}

    for count, (batch_x, batch_y) in enumerate(train_dataloader):
        torch.cuda.empty_cache()
        batch_x = batch_x.to(device)
        label = batch_y[0].split('-')[0]
        batch_y = torch.tensor([0]).to(device)
        try:
            with torch.no_grad():
                _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')
        except:
            print('Proc', str(i), 'EER:', label)
            continue
    #     _, _, emb, _, _ = model(batch_x, batch_y, mod='eval')

        emb = emb.squeeze().data.cpu().numpy()

        if label not in class_list_new.keys():
            class_list_new[label] = emb[None, :]
        else:
            class_list_new[label] = np.append(class_list_new[label], emb[None, :], axis=0)

        if (count+1) % 10000 == 0:
            print('Proc '+ str(i) + ':' + str((count+1) // 10000))
    
    class_list_new_m[i] = class_list_new
    del model, batch_x, batch_y
    torch.cuda.empty_cache()

In [12]:
processes = [Process(target = extract_feature_m, args = (i, data_m[i], class_list_new_m)) for i in range(num_p)]
[p.start() for p in processes]
joined = [p.join() for p in processes]

Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Using new training model bank
Proc 27:1
Proc 24:1
Proc 26:1
Proc 22:1
Proc 20:1
Proc 16:1
Proc 10:1
Proc 6:1
Proc 8:1
Proc 18:1
Proc 4:1
Proc 25:1
Proc 2:1
Proc 12:1
Proc 14:1
Proc 21:1
Proc

In [13]:
count = 0
for i in class_list_new_m:
    for j in i:
        count += len(i[j])
count

1276888

In [14]:
class_list_new = class_list_new_m[0]

for count, this_list in enumerate(class_list_new_m):
    if count == 0:
        continue
    for this_label in this_list:
        if this_label not in class_list_new.keys():
            class_list_new[this_label] = this_list[this_label]
        else:
            class_list_new[this_label] = np.append(class_list_new[this_label], this_list[this_label], axis=0)

In [17]:
count = 0
for i in class_list_new:
    count += len(class_list_new[i])
count

1276888

In [18]:
nanlist = []
for i in class_list_new:
    if np.isnan(class_list_new[i]).any():
        print(i)
        nanlist.append(i)
for i in nanlist:
    class_list_new.pop(i)

In [19]:
count

1276888

In [20]:
len(class_list_new)

7323

In [21]:
import pickle
out = component_dir+'/'+PLDA_DATA_NAME
with open(out, 'wb') as handle:
    pickle.dump(class_list_new, handle)

In [20]:
# class_list_new = {}
# for i in class_list:
#     class_list_new[i[:-1]] = class_list[i]

# PLDA_FIN

In [21]:
import os
import sys
sys.path.append('./train')

import numpy as np
import torch


# import vox_model_bank
from train import train_model_new
from train.read_data import *
from train.my_dataloader import *
# from sklearn.metrics import roc_curve

In [86]:
from kaldi_plda import *
from kaldi_lda import *
import numpy as np

In [129]:
import pickle
with open('/Lun2/rzz/kaldi-master/egs/zhiyong/sre19/new_testbench'+'/'+PLDA_DATA_NAME, 'rb') as handle:
    class_list_new = pickle.load(handle)

In [130]:
# Substract global mean
global_mean = np.zeros(512)
num_utt = 0
for count, i in enumerate(class_list_new):
    num_utt += class_list_new[i].shape[0]
    global_mean += class_list_new[i].shape[0] * np.mean(class_list_new[i], axis=0)
    
global_mean = (1.0 / num_utt) * global_mean
print('Norm of mean:', np.linalg.norm(global_mean))

Norm of mean: 1.9705881360946418


In [131]:
for i in class_list_new:
    class_list_new[i] = class_list_new[i] - global_mean

In [26]:
# # normlize to sqrt(dim)
# for i in class_list_new:
#     scale = np.sqrt(512) / np.linalg.norm(class_list_new[i], axis=1, keepdims=True)
#     class_list_new[i] = scale * class_list_new[i]

In [132]:
lda = LDA(lda_dim=PLDA_DIM, ivector_dim=512)
for i in class_list_new:
    lda.AccStats(class_list_new[i])
print('lda norm of global mean:', lda.GetGlobalMean()[1])

lda norm of global mean: 9.641873606430675e-08


In [133]:
transform = lda.ComputeLdaTransform()

the input data has norm of mean 9.641873606430675e-08
[7.01190490e+00 6.75681662e+00 6.53853053e+00 6.25568993e+00
 6.11248365e+00 5.95853316e+00 5.76067349e+00 5.64741497e+00
 5.57487744e+00 5.41990231e+00 5.38111534e+00 5.31902969e+00
 5.23789065e+00 5.19969617e+00 5.15591191e+00 5.06367837e+00
 4.95002558e+00 4.87409618e+00 4.84840474e+00 4.74871468e+00
 4.71036534e+00 4.62632340e+00 4.59444607e+00 4.53859824e+00
 4.50825004e+00 4.44792223e+00 4.42513736e+00 4.34977219e+00
 4.28159505e+00 4.25209140e+00 4.19223498e+00 4.16862494e+00
 4.06981441e+00 4.06355494e+00 4.03968798e+00 3.96515890e+00
 3.92289551e+00 3.89805072e+00 3.86241858e+00 3.79790583e+00
 3.72304290e+00 3.67162826e+00 3.64237800e+00 3.57782648e+00
 3.55594916e+00 3.51338202e+00 3.48159699e+00 3.44671244e+00
 3.38799819e+00 3.37222889e+00 3.31932983e+00 3.26479589e+00
 3.23583721e+00 3.18890552e+00 3.16877421e+00 3.13774669e+00
 3.09139334e+00 3.04371133e+00 3.00723616e+00 2.97468080e+00
 2.93475707e+00 2.91202466e+00 

In [29]:
# # normlize to sqrt(dim)
# for i in class_list_new:
#     scale = np.sqrt(512) / np.linalg.norm(class_list_new[i], axis=1, keepdims=True)
#     class_list_new[i] = scale * class_list_new[i]

In [134]:
for i in class_list_new:
    class_list_new[i] = class_list_new[i].dot(transform.T)

In [135]:
# normlize to sqrt(dim)
for i in class_list_new:
    scale = np.sqrt(PLDA_DIM) / np.linalg.norm(class_list_new[i], axis=1, keepdims=True)
    class_list_new[i] = scale * class_list_new[i]

In [136]:
plda_stats = PldaStats(PLDA_DIM)
for i in class_list_new:
    plda_stats.add_samples(class_list_new[i])

In [137]:
plda_stats.sort()
plda_stats.is_sorted()

True

In [138]:
# test_fin_prior
plda_estimator = PldaEstimation(plda_stats)
plda_paras = plda_estimator.estimate(iteration=5)

1 5
nllr_x: 411.3536651606943
nllr_y: -371.394730264205
normalized_nllr: 39.95893489648931
nllr_m: 412.13621538628524
nllr_m_2: 412.13621538628604
part1_residual -455.29228631886446
part2_mean -412.13621538628524
normlized_obj -455.04478465275184
2 5
nllr_x: 240.15276805056112
nllr_y: -458.94205476522495
normalized_nllr: -218.7892867146638
nllr_m: 246.5813414941023
nllr_m_2: 246.5813414941021
part1_residual -437.3886274503415
part2_mean -246.5813414941023
normlized_obj -436.294340594284
3 5
nllr_x: 237.46553834505684
nllr_y: -458.8741951366892
normalized_nllr: -221.40865679163232
nllr_m: 245.48345787472775
nllr_m_2: 245.48345787472786
part1_residual -437.3910096499305
part2_mean -245.48345787472775
normlized_obj -436.2904127286267
4 5
nllr_x: 237.20065986248494
nllr_y: -458.7576755590343
normalized_nllr: -221.55701569654934
nllr_m: 245.46150954196784
nllr_m_2: 245.4615095419679
part1_residual -437.3910074823672
part2_mean -245.46150954196784
normlized_obj -436.2902846989926
5 5
nllr_x:

In [139]:
import pickle
out = component_dir+'/'+ PLDA_PARA_NAME
with open(out, 'wb') as handle:
    pickle.dump(plda_paras, handle)

# Load PLDA model

In [140]:
import pickle
with open(component_dir+'/'+ PLDA_PARA_NAME, 'rb') as handle:
    plda_paras = pickle.load(handle)

In [141]:
plda = PLDA(plda_paras[0], plda_paras[1], plda_paras[2])

# Unsupervised adaptation

In [179]:
import pickle
with open(component_dir+'/'+TRAIN_DATA_NAME, 'rb') as handle:
    train_data = pickle.load(handle)

In [183]:
unlabeled_list = np.zeros([0, 512])
for i in train_data:
    unlabeled_list = np.append(unlabeled_list, train_data[i], axis=0)

In [185]:
major_mean = np.mean(unlabeled_list, axis=0)

In [31]:
# import pickle
# out = component_dir+'/major_mean'
# with open(out, 'wb') as handle:
#     pickle.dump(major_mean, handle)

In [186]:
unlabeled_list = unlabeled_list - major_mean

In [187]:
unlabeled_list = unlabeled_list.dot(transform.T)

In [188]:
# normlize to sqrt(dim)
scale = np.sqrt(PLDA_DIM) / np.linalg.norm(unlabeled_list, axis=1, keepdims=True)
unlabeled_list = scale * unlabeled_list

In [190]:
adaptor = PldaUnsupervisedAdaptor(dim=PLDA_DIM)

In [191]:
for i in range(unlabeled_list.shape[0]):
    adaptor.add_stats(unlabeled_list[i])

In [192]:
new_plda_paras = adaptor.update_plda(plda_paras[0], plda_paras[1], plda_paras[2])

In [193]:
import pickle
out = component_dir+'/new_plda_paras'
with open(out, 'wb') as handle:
    pickle.dump(new_plda_paras, handle)

In [194]:
plda = PLDA(new_plda_paras[0], new_plda_paras[1], new_plda_paras[2])

# Enroll models

In [202]:
import pickle
with open(component_dir+'/'+ENR_DATA_NAME, 'rb') as handle:
    enr_data = pickle.load(handle)

In [203]:
enr_list = {}
num_utt = {}
with open('/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/docs/model_enrollment.txt', 'r') as f:
    for count, line in enumerate(f):
        if count == 0:
            continue
        info = line[:-1].split(' ')
        model_label = info[0]
        num_utt[model_label] = len(info)-1
        for i in range(1, len(info)):
            if model_label not in enr_list.keys():
                enr_list[model_label] = enr_data[info[i]]
            else:
                enr_list[model_label] = np.append(enr_list[model_label], enr_data[info[i]], axis=0)

# Test data

In [204]:
import pickle
with open(component_dir+'/'+EVL_DATA_NAME, 'rb') as handle:
    evl_data = pickle.load(handle)

# Scoring

In [150]:
trail_path = '/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/docs/trials.txt'
score_out_path = component_dir+'/'+SCORING_PLDA_NAME

In [152]:
for i in enr_list:
    enr_list[i] = np.mean(enr_list[i], axis=0).squeeze()
    enr_list[i] = enr_list[i] - global_mean
    enr_list[i] = transform.dot(enr_list[i])
    enr_list[i] = (np.sqrt(PLDA_DIM) / np.linalg.norm(enr_list[i])) * enr_list[i]
    num = num_utt[i]
    enr_list[i] = plda.transform_ivector(enr_list[i], num)

In [153]:
for i in evl_data:
    evl_data[i] = evl_data[i].squeeze()
    evl_data[i] = evl_data[i] - global_mean
    evl_data[i] = transform.dot(evl_data[i])
    evl_data[i] = (np.sqrt(PLDA_DIM) / np.linalg.norm(evl_data[i])) * evl_data[i]
    evl_data[i] = plda.transform_ivector(evl_data[i], 1)

In [154]:
with open(score_out_path, 'w') as of:
    with open(trail_path, 'r') as f:
        for count, line in enumerate(f):
            line = line[:-1]
            if count == 0:
                print(line)
                continue
            enroll_emb = enr_list[line.split(' ')[0]].squeeze()
            num = num_utt[line.split(' ')[0]]
            test_emb = evl_data[line.split(' ')[1]].squeeze()

            cosine = plda.log_likelihood_ratio(enroll_emb, num, test_emb)
            
            of.write(str(cosine)+'\n')
            
            if (count+1) % 5000 == 0:
                print(count+1)

model-id evaluation-file-id
5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000
60000
65000
70000
75000
80000
85000
90000
95000
100000
105000
110000
115000
120000
125000
130000
135000
140000
145000
150000
155000
160000
165000
170000
175000
180000
185000
190000
195000
200000
205000
210000
215000
220000
225000
230000
235000
240000
245000
250000
255000
260000
265000
270000
275000
280000
285000
290000
295000
300000
305000
310000
315000
320000
325000
330000
335000
340000
345000
350000
355000
360000
365000
370000
375000
380000
385000
390000
395000
400000
405000
410000
415000
420000
425000
430000
435000
440000
445000
450000
455000
460000
465000
470000
475000
480000
485000
490000
495000
500000
505000
510000
515000
520000
525000
530000
535000
540000
545000
550000
555000
560000
565000
570000
575000
580000
585000
590000
595000
600000
605000
610000
615000
620000
625000
630000
635000
640000
645000
650000
655000
660000
665000
670000
675000
680000
685000
690000
695000
700000
705000
71000

5245000
5250000
5255000
5260000
5265000
5270000
5275000
5280000
5285000
5290000
5295000
5300000
5305000
5310000
5315000
5320000
5325000
5330000
5335000
5340000
5345000
5350000
5355000
5360000
5365000
5370000
5375000
5380000
5385000
5390000
5395000
5400000
5405000
5410000
5415000
5420000
5425000
5430000
5435000
5440000
5445000
5450000
5455000
5460000
5465000
5470000
5475000
5480000
5485000
5490000
5495000
5500000
5505000
5510000
5515000
5520000
5525000
5530000
5535000
5540000
5545000
5550000
5555000
5560000
5565000
5570000
5575000
5580000
5585000
5590000
5595000
5600000
5605000
5610000
5615000
5620000
5625000
5630000
5635000
5640000
5645000
5650000
5655000
5660000
5665000
5670000
5675000
5680000
5685000
5690000
5695000
5700000
5705000
5710000
5715000
5720000
5725000
5730000
5735000
5740000
5745000
5750000
5755000
5760000
5765000
5770000
5775000
5780000
5785000
5790000
5795000
5800000
5805000
5810000
5815000
5820000
5825000
5830000
5835000
5840000
5845000
5850000
5855000
5860000
5865000


10325000
10330000
10335000
10340000
10345000
10350000
10355000
10360000
10365000
10370000
10375000
10380000
10385000
10390000
10395000
10400000
10405000
10410000
10415000
10420000
10425000
10430000
10435000
10440000
10445000
10450000
10455000
10460000
10465000
10470000
10475000
10480000
10485000
10490000
10495000
10500000
10505000
10510000
10515000
10520000
10525000
10530000
10535000
10540000
10545000
10550000
10555000
10560000
10565000
10570000
10575000
10580000
10585000
10590000
10595000
10600000
10605000
10610000
10615000
10620000
10625000
10630000
10635000
10640000
10645000
10650000
10655000
10660000
10665000
10670000
10675000
10680000
10685000
10690000
10695000
10700000
10705000
10710000
10715000
10720000
10725000
10730000
10735000
10740000
10745000
10750000
10755000
10760000
10765000
10770000
10775000
10780000
10785000
10790000
10795000
10800000
10805000
10810000
10815000
10820000
10825000
10830000
10835000
10840000
10845000
10850000
10855000
10860000
10865000
10870000
10875000
1

In [156]:
count

13198024

# Scoring adapt plda

In [198]:
trail_path = '/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/docs/trials.txt'
score_out_path = component_dir+'/'+SCORING_PLDA_NAME+'adapt'

In [199]:
for i in enr_list:
    enr_list[i] = np.mean(enr_list[i], axis=0).squeeze()
    enr_list[i] = enr_list[i] - major_mean
    enr_list[i] = transform.dot(enr_list[i])
    enr_list[i] = (np.sqrt(PLDA_DIM) / np.linalg.norm(enr_list[i])) * enr_list[i]
    num = num_utt[i]
    enr_list[i] = plda.transform_ivector(enr_list[i], num)

In [200]:
for i in evl_data:
    evl_data[i] = evl_data[i].squeeze()
    evl_data[i] = evl_data[i] - major_mean
    evl_data[i] = transform.dot(evl_data[i])
    evl_data[i] = (np.sqrt(PLDA_DIM) / np.linalg.norm(evl_data[i])) * evl_data[i]
    evl_data[i] = plda.transform_ivector(evl_data[i], 1)

In [201]:
with open(score_out_path, 'w') as of:
    with open(trail_path, 'r') as f:
        for count, line in enumerate(f):
            line = line[:-1]
            if count == 0:
                print(line)
                continue
            enroll_emb = enr_list[line.split(' ')[0]].squeeze()
            num = num_utt[line.split(' ')[0]]
            test_emb = evl_data[line.split(' ')[1]].squeeze()

            cosine = plda.log_likelihood_ratio(enroll_emb, num, test_emb)
            
            of.write(str(cosine)+'\n')
            
            if (count+1) % 100000 == 0:
                print((count+1) // 100000)

model-id evaluation-file-id
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
10.0
11.0
12.0
13.0
14.0
15.0
16.0
17.0
18.0
19.0
20.0
21.0
22.0
23.0
24.0
25.0
26.0
27.0
28.0
29.0
30.0
31.0
32.0
33.0
34.0
35.0
36.0
37.0
38.0
39.0
40.0
41.0
42.0
43.0
44.0
45.0
46.0
47.0
48.0
49.0
50.0
51.0
52.0
53.0
54.0
55.0
56.0
57.0
58.0
59.0
60.0
61.0
62.0
63.0
64.0
65.0
66.0
67.0
68.0
69.0
70.0
71.0
72.0
73.0
74.0
75.0
76.0
77.0
78.0
79.0
80.0
81.0
82.0
83.0
84.0
85.0
86.0
87.0
88.0
89.0
90.0
91.0
92.0
93.0
94.0
95.0
96.0
97.0
98.0
99.0
100.0
101.0
102.0
103.0
104.0
105.0
106.0
107.0
108.0
109.0
110.0
111.0
112.0
113.0
114.0
115.0
116.0
117.0
118.0
119.0
120.0
121.0
122.0
123.0
124.0
125.0
126.0
127.0
128.0
129.0
130.0
131.0


In [156]:
count

13198024

# Cosine Scoring

In [205]:
trail_path = '/Lun0/zhiyong/SdSV_2020_deepmine/task2_enrollment/docs/trials.txt'
score_out_path = component_dir+'/'+SCORING_COSINE_NAME

In [206]:
for i in enr_list:
    enr_list[i] = np.mean(enr_list[i], axis=0).squeeze()
    enr_list[i] = (1.0 / np.linalg.norm(enr_list[i])) * enr_list[i]

In [207]:
for i in evl_data:
    evl_data[i] = evl_data[i].squeeze()
    evl_data[i] = (1.0 / np.linalg.norm(evl_data[i])) * evl_data[i]

In [210]:
with open(score_out_path, 'w') as of:
    with open(trail_path, 'r') as f:
        for count, line in enumerate(f):
            line = line[:-1]
            if count == 0:
                print(line)
                continue
            enroll_emb = enr_list[line.split(' ')[0]].squeeze()
            test_emb = evl_data[line.split(' ')[1]].squeeze()

            cosine = np.dot(enroll_emb, test_emb)
            
            of.write(str(cosine)+'\n')
            
            if (count+1) % 100000 == 0:
                print((count+1) // 100000)

model-id evaluation-file-id
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131


# scoring

In [52]:
from my_scorer import scoring
from calibrate_scores import calibrating
from apply_calibration import applying

In [53]:
score_file = component_dir+'/'+SCORING_PLDA_NAME
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = score_file+'_calib'
linear_model_pth = component_dir+'/'+'calib.pth'

_ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file])
applying(linear_model_pth, [score_file], calib_score_file)
_ = scoring(calib_score_file, key_file)


Set	EER[%]	min_C	act_C
OUT	01.98	0.216	0.303
Starting point for CLLR is 0.320928
Converged linear model with loss 0.08190182800917162

Set	EER[%]	min_C	act_C
OUT	01.98	0.216	0.258


In [54]:
score_file = component_dir+'/'+SCORING_COSINE_NAME
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = score_file+'_calib'
linear_model_pth = component_dir+'/'+'calib.pth'

_ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file])
applying(linear_model_pth, [score_file], calib_score_file)
_ = scoring(calib_score_file, key_file)


Set	EER[%]	min_C	act_C
OUT	02.18	0.231	1.000
Starting point for CLLR is 0.842448
Converged linear model with loss 0.08916190600896338

Set	EER[%]	min_C	act_C
OUT	02.18	0.231	0.260


# Score norm

In [58]:
from score_norm_1 import score_norm

In [59]:
ori_score = component_dir+'/'+SCORING_PLDA_NAME
norm_score = component_dir+'/'+SCORING_PLDA_NAME+'_norm'
score_norm(ori_score, norm_score)

  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


In [60]:
score_file = component_dir+'/'+SCORING_PLDA_NAME+'_norm'
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = score_file+'_calib'
linear_model_pth = component_dir+'/'+'calib.pth'

_ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file])
applying(linear_model_pth, [score_file], calib_score_file)
_ = scoring(calib_score_file, key_file)


Set	EER[%]	min_C	act_C
OUT	01.88	0.312	1.000
Starting point for CLLR is 0.538934
Converged linear model with loss 0.07954009941343457

Set	EER[%]	min_C	act_C
OUT	01.88	0.312	0.334


In [61]:
ori_score = component_dir+'/'+SCORING_COSINE_NAME
norm_score = component_dir+'/'+SCORING_COSINE_NAME+'_norm'
score_norm(ori_score, norm_score)

  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


In [62]:
score_file = component_dir+'/'+SCORING_COSINE_NAME+'_norm'
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = score_file+'_calib'
linear_model_pth = component_dir+'/'+'calib.pth'

_ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file])
applying(linear_model_pth, [score_file], calib_score_file)
_ = scoring(calib_score_file, key_file)


Set	EER[%]	min_C	act_C
OUT	01.81	0.311	1.000
Starting point for CLLR is 0.536303
Converged linear model with loss 0.07717334626909308

Set	EER[%]	min_C	act_C
OUT	01.81	0.311	0.331


# En

In [63]:
score_file_1 = component_dir+'/'+SCORING_PLDA_NAME
score_file_2 = component_dir+'/'+SCORING_COSINE_NAME
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = component_dir+'/'+'fuse'
linear_model_pth = component_dir+'/'+'calib.pth'

# _ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file_1, score_file_2])
applying(linear_model_pth, [score_file_1, score_file_2], calib_score_file)
_ = scoring(calib_score_file, key_file)

Starting point for CLLR is 0.581688
Converged linear model with loss 0.074283602014203

Set	EER[%]	min_C	act_C
OUT	01.74	0.204	0.221


In [65]:
score_file_1 = component_dir+'/'+SCORING_PLDA_NAME+'_norm'
score_file_2 = component_dir+'/'+SCORING_COSINE_NAME+'_norm'
key_file = '/Lun0/zhiyong/dataset/vox1_kaldi_test/trials'
calib_score_file = component_dir+'/'+'fuse_norm'
linear_model_pth = component_dir+'/'+'calib.pth'

# _ = scoring(score_file, key_file)
calibrating(linear_model_pth, 50, key_file, [score_file_1, score_file_2])
applying(linear_model_pth, [score_file_1, score_file_2], calib_score_file)
_ = scoring(calib_score_file, key_file)

Starting point for CLLR is 0.537618
Converged linear model with loss 0.0694378887070766

Set	EER[%]	min_C	act_C
OUT	01.61	0.266	0.291
