# Wilcoxon Signed Rank Test

In [27]:
import json
import pandas as pd
import numpy as np
import scipy
from scipy.stats import wilcoxon


### Try Wilcoxon

In [28]:
d = [6, 8, 14, 16, 23, 24, 28, 29, 41, -48, 49, 56, 60, -67, 75]
d = [6, 8, 14, 16, 23, 24, 28, 29, 41, 49, 56, 60, 75]
w, p = wilcoxon(d, alternative='greater')
w, p = wilcoxon(d)
p

0.000244140625

## Analyze

In [29]:
with open('result/RQ2.json', 'r') as f:
  data = json.load(f)


In [30]:
res = {}

asrs = ["quartznet", "hubert", "wav2vec-base"]
datasets = ["YBAA", "ZHAA", "ASI", "TNI", "NCC",
            "TXHC", "EBVS", "ERMS", "YDCK", "YKWK", "THV", "TLV"]
tools = ["random", "triphone_rich", "pure_diversity", "icassp_without_diversity_enhancing_real_mix", "icassp_real_mix",
         "asrevolve_error_model_real", "word_error_real_mix/no_word_enhance", "word_error_real_mix/word_enhance"]


# asrs = ["quartznet"]
# datasets = ["YBAA", "ZHAA", "ASI", "TNI"]
# tools = ["random", "triphone_rich", "pure_diversity"]

res = {}
for asr in asrs:
    res[asr] = {}
    for dataset in datasets:
        res[asr][dataset] = {}
        for tool in tools:
            res[asr][dataset][tool] = pd.read_csv(data[asr][dataset][tool])
            
# res


### 1. Analyze the experiment grouped by \<ASR Model\>

In [31]:
def get_wer_result(df):
    arr = list(df["WER_Seed1"]) + list(df["WER_Seed2"]) + list(df["WER_Seed3"])
    assert len(arr) == 3 * len(df["WER_Seed1"])
    assert len(arr) == 3 * len(df["WER_Seed2"])
    assert len(arr) == 3 * len(df["WER_Seed3"])
    return arr

wer_p_value = {}
for asr in asrs:
    wer_p_value[asr] = pd.DataFrame()

    values = {}
        
    for tool_1 in tools:
        values[tool_1] = {}
        for tool_2 in tools :

            if tool_1 == tool_2:
                values[tool_1][tool_2] = 0
            else :
                arr_1 = []
                arr_2 = []
                for dataset in datasets :
                    arr_1 += get_wer_result(res[asr][dataset][tool_1])
                    arr_2 += get_wer_result(res[asr][dataset][tool_2])

                w, p = scipy.stats.wilcoxon(arr_1, arr_2)
                # we would reject the null hypothesis at a confidence level of 5%
                if p < 0.05:
                    values[tool_1][tool_2] = 1
                else :
                    values[tool_1][tool_2] = 0
    
    # print(values)

    df = pd.DataFrame(data=values)
    print(df)
            


                                             random  triphone_rich  \
random                                            0              0   
triphone_rich                                     0              0   
pure_diversity                                    1              0   
icassp_without_diversity_enhancing_real_mix       0              1   
icassp_real_mix                                   0              0   
asrevolve_error_model_real                        0              0   
word_error_real_mix/no_word_enhance               0              1   
word_error_real_mix/word_enhance                  1              1   

                                             pure_diversity  \
random                                                    1   
triphone_rich                                             0   
pure_diversity                                            0   
icassp_without_diversity_enhancing_real_mix               1   
icassp_real_mix                                       