In [1]:
import numpy as np
import pandas as pd

In [46]:
base_dir = "/Users/lmatayoshi/Documents/Projects/tesis_notebooks/kappa_experiments/"
eers_dir = base_dir + "eers/"
csvs_dir = base_dir + "csvs/"

In [20]:
kappa_phonemes = ['G', 'b', 'w', 'B', 'D', 'm', 'i', 's']

In [21]:
def kappa_positives_negatives_dict():
    phonemes = ['G', 'b', 'w', 'B', 'D', 'm', 'i', 's']
    total_values = [865, 923, 1243, 1597, 2929, 3920, 6167, 8035]
    total_dict = dict(zip(phonemes, total_values))
    positives_values = [222, 528, 743, 428, 920, 3234, 4929, 7555]
    positives_dict = dict(zip(phonemes, positives_values))
    negatives_values = [643, 395, 500, 1169, 2009, 686, 1238, 480]
    negatives_dict = dict(zip(phonemes, negatives_values))
    return (total_dict, positives_dict, negatives_dict)

# Main

In [49]:
def main(eers_dir, csv_filename, values):
    eers_by_value = dict(zip(values, [None] * len(values)))
    
    for v in values:
        with open(eers_dir + str(v), "r") as f:
            lines = f.readlines()
            eer_degree_i = [lines[j].strip().split(":") for j in range(len(lines))]
            eer_degree_i = [value for value in eer_degree_i if len(value) == 2]
            eer_degree_i = dict(eer_degree_i)
            eers_by_value[v] = eer_degree_i
    
    kappa_total_dict, kappa_positives_dict, kappa_negatives_dict = kappa_positives_negatives_dict()
    kappa_total = np.array([kappa_total_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)
    kappa_positives = np.array([kappa_positives_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)
    kappa_negatives = np.array([kappa_negatives_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)
    
    base_matrix = np.array(kappa_phonemes).reshape(-1,1)
    for key in eers_by_value.keys():
        results_dict = eers_by_value[key]
        results = [float(results_dict[phoneme]) for phoneme in kappa_phonemes]
        results = np.array(results).reshape(-1,1)
        base_matrix = np.hstack((base_matrix, results))
    
    base_matrix = np.hstack((base_matrix, kappa_positives))
    base_matrix = np.hstack((base_matrix, kappa_negatives))
    base_matrix = np.hstack((base_matrix, kappa_total))
    
    dataframe = pd.DataFrame(base_matrix[:, 1:], index=kappa_phonemes, columns=values+['corrects', 'incorrects', 'total'])
    dataframe = dataframe.astype(np.float)
    dataframe = dataframe.sort_values(by=['total'])
    dataframe.round(3).to_csv(path_or_buf=csv_filename, index_label="Phonemes")
    return dataframe


In [50]:
main(
    eers_dir + "legendre_best_degree_eers/",
    csvs_dir + "legendre_best_degree.csv",
    range(7)
)

Unnamed: 0,0,1,2,3,4,5,6,corrects,incorrects,total
G,0.231726,0.227061,0.220809,0.233281,0.235838,0.233281,0.233281,222.0,643.0,865.0
b,0.2,0.174242,0.174684,0.189394,0.195076,0.191288,0.183112,528.0,395.0,923.0
w,0.161837,0.158,0.157102,0.156,0.16,0.165899,0.168,743.0,500.0,1243.0
B,0.225834,0.221963,0.218991,0.224299,0.221557,0.227545,0.235981,428.0,1169.0,1597.0
D,0.250871,0.248382,0.243902,0.240916,0.245652,0.247826,0.252862,920.0,2009.0,2929.0
m,0.190962,0.163265,0.156085,0.155498,0.157434,0.1625,0.164723,3234.0,686.0,3920.0
i,0.260905,0.25705,0.258777,0.254007,0.254211,0.258673,0.259485,4929.0,1238.0,6167.0
s,0.376704,0.328427,0.31875,0.3,0.295833,0.29636,0.304167,7555.0,480.0,8035.0


In [52]:
main(
    eers_dir + "dct_n_coefficients_eers/",
    csvs_dir + "dct_n_coefficients.csv",
    range(1,11)
)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,corrects,incorrects,total
G,0.241058,0.232327,0.234234,0.243243,0.248036,0.238739,0.238739,0.24379,0.250052,0.233766,222.0,643.0,865.0
b,0.201985,0.17803,0.187342,0.186349,0.182278,0.191288,0.1875,0.196669,0.200758,0.202532,528.0,395.0,923.0
w,0.172,0.170695,0.169583,0.172275,0.17,0.174,0.170929,0.170929,0.174056,0.176085,743.0,500.0,1243.0
B,0.238666,0.228826,0.235981,0.238318,0.238519,0.23781,0.235244,0.232094,0.236099,0.231308,428.0,1169.0,1597.0
D,0.26087,0.254351,0.245893,0.252254,0.250455,0.254348,0.26087,0.260826,0.265804,0.265217,920.0,2009.0,2929.0
m,0.1812,0.157081,0.156772,0.163884,0.167638,0.16772,0.16744,0.168522,0.171614,0.171923,3234.0,686.0,3920.0
i,0.27706,0.276527,0.270598,0.26979,0.272875,0.271252,0.27044,0.275444,0.27846,0.279483,4929.0,1238.0,6167.0
s,0.3375,0.314105,0.313538,0.312004,0.312111,0.314063,0.314583,0.320833,0.320833,0.325,7555.0,480.0,8035.0


In [53]:
main(
    eers_dir + "legendre_best_alpha_eers/",
    csvs_dir + "legendre_best_alpha.csv",
    [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
)

Unnamed: 0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,corrects,incorrects,total
G,0.22395,0.222395,0.225505,0.220721,0.216216,0.223071,0.224124,0.222395,0.225225,0.22395,222.0,643.0,865.0
b,0.179747,0.186177,0.175515,0.176136,0.1875,0.174363,0.183712,0.183612,0.189873,0.18481,528.0,395.0,923.0
w,0.158666,0.158816,0.160097,0.158,0.160162,0.161507,0.159162,0.158348,0.161507,0.16142,743.0,500.0,1243.0
B,0.221963,0.225423,0.226408,0.227055,0.221963,0.227371,0.222412,0.2284,0.224123,0.221963,428.0,1169.0,1597.0
D,0.240014,0.24888,0.234783,0.242907,0.244898,0.238043,0.240014,0.23913,0.246889,0.244803,920.0,2009.0,2929.0
m,0.161741,0.166181,0.161808,0.16035,0.166181,0.161581,0.162028,0.16035,0.16517,0.162415,3234.0,686.0,3920.0
i,0.256058,0.259289,0.255224,0.253635,0.256866,0.253222,0.256058,0.25563,0.258006,0.255224,4929.0,1238.0,6167.0
s,0.325,0.31875,0.312984,0.321305,0.325,0.320833,0.330756,0.304167,0.324024,0.329167,7555.0,480.0,8035.0


In [54]:
main(
    eers_dir + "features_combination_supervectors_legendre_eers/",
    csvs_dir + "features_combination_supervectors_legendre.csv",
    [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
)

Unnamed: 0,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,corrects,incorrects,total
G,0.162162,0.189189,0.167963,0.183515,0.202703,0.198198,0.193694,0.174184,0.166539,0.202703,0.200622,222.0,643.0,865.0
b,0.121519,0.162025,0.136709,0.153409,0.173825,0.16468,0.163004,0.14264,0.125,0.170455,0.164557,528.0,395.0,923.0
w,0.150605,0.149394,0.154724,0.152086,0.154,0.15,0.149394,0.152086,0.156,0.152086,0.15074,743.0,500.0,1243.0
B,0.205607,0.226867,0.209396,0.222412,0.224979,0.221557,0.222992,0.214953,0.198598,0.224123,0.221963,428.0,1169.0,1597.0
D,0.181682,0.207609,0.189149,0.201593,0.221503,0.219333,0.213043,0.192391,0.181522,0.22001,0.217279,920.0,2009.0,2929.0
m,0.153989,0.151515,0.147186,0.150146,0.154519,0.154519,0.15315,0.147959,0.145022,0.154917,0.152752,3234.0,686.0,3920.0
i,0.254443,0.254616,0.257739,0.25525,0.252827,0.25421,0.255021,0.254413,0.258481,0.253446,0.253635,4929.0,1238.0,6167.0
s,0.325444,0.300993,0.299026,0.302083,0.3,0.298345,0.30086,0.302846,0.296492,0.298743,0.299694,7555.0,480.0,8035.0


In [51]:
main(
    eers_dir + "features_combination_supervectors_dct_eers/",
    csvs_dir + "features_combination_supervectors_dct.csv",
    [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
)

Unnamed: 0,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,corrects,incorrects,total
G,0.162162,0.191291,0.175676,0.19038,0.211509,0.207207,0.192773,0.189189,0.174184,0.207207,0.206843,222.0,643.0,865.0
b,0.121519,0.162025,0.141282,0.156962,0.177215,0.167089,0.162025,0.151899,0.126582,0.172152,0.164359,528.0,395.0,923.0
w,0.150605,0.157738,0.15121,0.156,0.163315,0.16,0.158773,0.15,0.150442,0.162853,0.158,743.0,500.0,1243.0
B,0.205607,0.216424,0.203271,0.213003,0.231308,0.224979,0.217282,0.21028,0.201882,0.229256,0.222295,428.0,1169.0,1597.0
D,0.181682,0.218019,0.190144,0.212359,0.233449,0.231458,0.221982,0.201593,0.182781,0.230435,0.223992,920.0,2009.0,2929.0
m,0.153989,0.15102,0.145714,0.144811,0.153061,0.153989,0.152041,0.145462,0.140723,0.153989,0.15337,3234.0,686.0,3920.0
i,0.254443,0.260097,0.250404,0.255224,0.268174,0.267367,0.263131,0.253398,0.251775,0.268174,0.263328,4929.0,1238.0,6167.0
s,0.325444,0.298312,0.289583,0.297816,0.301787,0.3,0.303111,0.294748,0.283333,0.29996,0.304167,7555.0,480.0,8035.0


# Step by step

In [19]:
base_dir = "/Users/lmatayoshi/Documents/Projects/tesis_notebooks/kappa_experiments/"
eers_dir = base_dir + "features_combination_supervectors_legendre_eers/"
csv_filename = base_dir + "features_combination_supervectors_legendre.csv"

In [22]:
proportions = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

In [23]:
eers_by_proportion = dict(zip(proportions, [None] * len(proportions)))

In [24]:
for p in proportions:
    with open(eers_dir + str(p), "r") as f:
        lines = f.readlines()
        eer_degree_i = [lines[j].strip().split(":") for j in range(len(lines))]
        eer_degree_i = [value for value in eer_degree_i if len(value) == 2]
        eer_degree_i = dict(eer_degree_i)
        eers_by_proportion[p] = eer_degree_i

In [25]:
kappa_total_dict, kappa_positives_dict, kappa_negatives_dict = kappa_positives_negatives_dict()
kappa_total = np.array([kappa_total_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)
kappa_positives = np.array([kappa_positives_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)
kappa_negatives = np.array([kappa_negatives_dict[phoneme] for phoneme in kappa_phonemes]).reshape(-1,1)

In [26]:
base_matrix = np.array(kappa_phonemes).reshape(-1,1)

In [27]:
for key in eers_by_proportion.keys():
    results_dict = eers_by_proportion[key]
    results = [float(results_dict[phoneme]) for phoneme in kappa_phonemes]
    results = np.array(results).reshape(-1,1)
    base_matrix = np.hstack((base_matrix, results))
    
base_matrix = np.hstack((base_matrix, kappa_positives))
base_matrix = np.hstack((base_matrix, kappa_negatives))
base_matrix = np.hstack((base_matrix, kappa_total))

In [28]:
dataframe = pd.DataFrame(base_matrix[:, 1:], index=kappa_phonemes, columns=proportions+['corrects', 'incorrects', 'total'])
dataframe = dataframe.astype(np.float)b
dataframe = dataframe.sort_values(by=['total'])
dataframe.round(3).to_csv(path_or_buf=csv_filename, index_label="Phonemes")


In [56]:
"_".join(['G', 'b', 'w', 'B', 'D', 'm', 'i', 's'])

'G_b_w_B_D_m_i_s'