In [1]:
import os
import numpy as np
import sidekit
import shutil

from tqdm import tqdm_notebook
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, precision_recall_curve, f1_score, precision_score, recall_score
import xgboost as xgb

  from ._conv import register_converters as _register_converters


Import theano


ERROR (theano.gpuarray): pygpu was configured but could not be imported or is too old (version 0.7 or higher required)
NoneType: None


In [2]:
def convert(filename, save_path, duration=None):
    
    save_filename = filename.split('/')[-1].split('.')[0]+'.wav'
    tmp_filename = os.path.join(save_path, save_filename)
    
    if duration is None:
    # convert to wav (8khz, 16 bit, 1 channel)
        os.system("sox {} -R --rate 8000 -b 16 -c 1 {} lowpass 3400 highpass 300".format(filename, tmp_filename))
    else:
        os.system("sox {} -R --rate 8000 -b 16 -c 1 {} lowpass 3400 highpass 300 trim 0 {}".format(filename,
                                                                                                   tmp_filename,
                                                                                                   duration))
        
def get_feature_server(feature_filename_structure):
    
    fs = sidekit.FeaturesServer(feature_filename_structure=feature_filename_structure,
                                dataset_list='["cep"]',
                                feat_norm="cmvn",
                                delta=True,
                                double_delta=True,
                                rasta=True,
                                mask='[0-12]',
                                keep_all_features=True)
    
    return fs


def get_idmap(leftids, rightids):
    
    idmap = sidekit.IdMap()
    idmap.leftids = leftids
    idmap.rightids = rightids
    idmap.start = np.empty((len(leftids)), dtype="|O")
    idmap.stop = np.empty((len(leftids)), dtype="|O")
    
    return idmap

def save_stat(idmap, fs, ubm, distrib_num, thread_num, save_path):
    
    enroll_stat = sidekit.StatServer(idmap,
                                     distrib_nb=distrib_num,
                                     feature_size=39)

    enroll_stat.accumulate_stat(ubm=ubm, 
                                feature_server=fs,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                num_thread=thread_num)

    enroll_stat.write(save_path)
    
def extract_ivectors(factor_analyser, ubm, stat_path, num_thread, batch_size):
    
    ivectors = factor_analyser.extract_ivectors(ubm,
                                                stat_path,
                                                prefix='',
                                                batch_size=batch_size,
                                                uncertainty=False,
                                                num_thread=num_thread)
    
    return ivectors

In [3]:
# There is small restriction for this function: in your wav_folder_path english audio file names should start
# from the word 'english_' and other languages audio can be whatever you want but not 'english_'

def get_ivectors_from_wav(wav_folder_path, ubm_path='i_vectors/ubm/ubm_512.h5',
                                           stat_ubm_path='i_vectors/statisics_test.h5',
                                           t_matrix_path='i_vectors/total_variability_train.h5'): 

    save_path_test = 'i_vectors/test/'
    shutil.rmtree(save_path_test, ignore_errors=True)
    os.mkdir(save_path_test)

    files_test2 = os.listdir(wav_folder_path)
    for f in files_test2:    
        convert(os.path.join(wav_folder_path, f), save_path_test)


    extract_folder = save_path_test
    extract_folder_list = extract_folder.split('/')
    extract_folder_list.insert(1, 'feat')
    feat_folder = '/'.join(extract_folder_list)
    
    shutil.rmtree(feat_folder, ignore_errors=True)

    extractor = sidekit.FeaturesExtractor(audio_filename_structure=os.path.join(extract_folder, '{}.wav'),
                                          feature_filename_structure=os.path.join(feat_folder, '{}.h5'),
                                          sampling_frequency=8000,
                                          lower_frequency=200,
                                          higher_frequency=3700,
                                          filter_bank="log",
                                          filter_bank_size=24,
                                          window_size=0.025,
                                          shift=0.01,
                                          ceps_number=20,
                                          vad="snr",
                                          snr=40,
                                          pre_emphasis=0.97,
                                          save_param=["vad", "energy", "cep", "fb"],
                                          keep_all_features=True)

    filenames = [f.split('.')[0] for f in os.listdir(extract_folder)]
    channel_list = np.zeros(len(filenames), dtype=np.int8)
    extractor.save_list(show_list=filenames,
                        channel_list=channel_list,
                        num_thread=os.cpu_count()//2)
    

    fs_test = get_feature_server(os.path.join(feat_folder, '{}.h5'))
    ubm_list_test = [f.split('.h5')[0] for f in os.listdir(feat_folder)]

    ubm = sidekit.Mixture(ubm_path)

    leftids_test = np.array(['eng' if f.startswith('english') else 'oth' for f in ubm_list_test])
    rightids_test = np.array(ubm_list_test)

    idmap_test = get_idmap(leftids_test, rightids_test)
    save_stat(idmap_test, fs_test, ubm, 512, os.cpu_count()//2, stat_ubm_path)

    fa = sidekit.FactorAnalyser(t_matrix_path)
    ivectors_test = extract_ivectors(fa, ubm, stat_ubm_path, os.cpu_count()//2, len(ubm_list_test)+20)
        
    X = ivectors_test.stat1
    y = np.array([0 if i == 'eng' else 1 for i in leftids_test])
    
    return X, y


def get_xgboost_predictions(X, model_path='xgb/xgb_model.model'):
    
    model = xgb.Booster(model_file=model_path)
    d_matr = xgb.DMatrix(X)
    preds = model.predict(d_matr)
    
    return preds

In [4]:
def main(wav_folder_path):
    X, y = get_ivectors_from_wav(wav_folder_path)
    preds = get_xgboost_predictions(X)
    
    return preds, y

In [5]:
wav_folder_path = input()

preds, y = main(wav_folder_path)

lid_test_small
