# Homework 4 Aligner

## Test LLR Initialization

## Load the package and dataset <a id='part-0'>

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import optparse, sys, os, logging
from collections import defaultdict
from itertools import islice
import time
import math

In [3]:
opts_datadir, opts_fileprefix = "data", "hansards"
opts_french, opts_english = "fr", "en"
# opts_datadir, opts_fileprefix = "data", "europarl"
# opts_french, opts_english = "de", "en"

In [4]:
opts_num_sents = 100000

In [5]:
f_data = "%s.%s" % (os.path.join(opts_datadir, opts_fileprefix), opts_french)
e_data = "%s.%s" % (os.path.join(opts_datadir, opts_fileprefix), opts_english)

In [6]:
bitext = [[sentence.strip().split() for sentence in pair] \
          for pair in islice(zip(open(f_data,encoding="utf8"), open(e_data,encoding="utf8")), opts_num_sents)]

In [7]:
# usage: theta = LLR_initialization(bitext)
def LLR_initialization(bitext, exp = 1, isReverse=False): 
    if isReverse:
        f=1
        e=0
    else:
        f=0
        e=1
    LLRs = defaultdict(float)
    sum_LLRs = defaultdict(float)
    
    t = set()
    s = set()
    
    t_count = defaultdict(int) # occurance of each words in t
    s_count = defaultdict(int) # occurance of each words in s
    ts_count = defaultdict(int) # occurance of each (t, s) pair
    
    for pair in bitext:
        t = t.union(set(pair[f]))
        s = s.union(set(pair[e]))
        
        for t_i in set(pair[f]):
            t_count[t_i] += 1
            for s_i in set(pair[e]):
                ts_count[(t_i, s_i)] += 1
            
        for s_i in set(pair[e]):
            s_count[s_i] += 1
      
    sum_ts_count = sum(ts_count.values())
    sum_t_count = sum(t_count.values())
    sum_s_count = sum(s_count.values())

    for (t_j, s_j) in ts_count.keys():
        p_ts = ts_count[(t_j, s_j)] / sum_ts_count
        p_t = t_count[t_j] / sum_t_count
        p_s = s_count[s_j] / sum_s_count
        
        if p_ts > p_t * p_s:
            # calculate LLR
            
            # (t? == t) and (s? == s)
            count_for_LLR_1 = ts_count[(t_j, s_j)]
            if count_for_LLR_1 != 0:
                LLR_1 = count_for_LLR_1 * math.log10((p_ts/p_s)/p_t)
            
            # (t? == t) and (s? == not s)
            count_for_LLR_2 = (t_count[t_j] - ts_count[(t_j, s_j)])
            if count_for_LLR_2 != 0:
                LLR_2 = count_for_LLR_2 * math.log10(((count_for_LLR_2 / sum_ts_count)/(1-p_s))/p_t)
            
            # (t? == not t) and (s? == s)
            count_for_LLR_3 = (s_count[s_j] - ts_count[(t_j, s_j)]) 
            if count_for_LLR_3 != 0:
                LLR_3 = count_for_LLR_3 * math.log10(((count_for_LLR_3 / sum_ts_count)/p_s)/(1-p_t))
            
            # (t? == not t) and (s? == not s)
            count_for_LLR_4 = sum_ts_count - count_for_LLR_1 - count_for_LLR_2 - count_for_LLR_3
            if count_for_LLR_4 != 0:
                LLR_4 = count_for_LLR_4 * math.log10(((count_for_LLR_4 / sum_ts_count)/(1-p_s))/(1-p_t))
            
            LLRs[(t_j, s_j)] = LLR_1 + LLR_2 + LLR_3 + LLR_4
        else:
            # p(t, s) <= p(t) * p(s), so initialize to uniform distribution
            LLRs[(t_j, s_j)] = 1.0 / len(t) 
            
        # for each source word, compute the sum of the LLR scores over all target words
        sum_LLRs[s_j] += LLRs[(t_j, s_j)]
            
       
    
    # then divide every LLR score by the single largest of these sums
                  
    largest = max(sum_LLRs.values()) 

    for i in LLRs.keys():
        LLRs[i] = LLRs[i] / largest
        
    # raise each LLR score to an empirically optimized exponent
    for i in LLRs.keys():
        LLRs[i] = LLRs[i] ** exp
               
    return LLRs

## Baseline 

At the very beginning, I implemented the baseline according to the assignment description provided by the professor. It achieved 0.3417 of AER.

In [8]:
sys.stderr.write("Training with Expectation Maximization...\n")

Training with Expectation Maximization...


In [9]:
%%time
# f is the French word set
# e is the English word set
# f_count is the word count dictionary for French word set
# N is the number of sentences
f = set()
e = set()
f_count = defaultdict(int)
for pair in bitext:
    f = f.union(set(pair[0]))
    e = e.union(set(pair[1]))
    for f_i in set(pair[0]):
        f_count[f_i] += 1
N = len(bitext)

CPU times: user 1min 48s, sys: 0 ns, total: 1min 48s
Wall time: 1min 48s


* $k = 0$<br>
* Initialize $t_0$ **## Easy choice: initialize uniformly ##**<br>
* repeat <br>
    * $k$ += 1 <br>
    * Initialize all counts to zero <br>
    * for each $(\textbf{f}, \textbf{e})$ in ${\cal D}$ <br>
        * for each $f_i$ in $\textbf{f}$ <br>
            * $Z$ = 0 **## Z commonly denotes a normalization term ##** <br>
            * for each $e_j$ in $\textbf{e}$ <br>
                * $Z$ += $t_{k-1}(f_i \mid e_j)$ <br>
            * for each $e_j$ in $\textbf{e}$ <br>
                * `c` = $ t_{k-1}(f_i \mid e_j) / Z $ <br>
                * count($f_i$, $e_j$) += `c` <br>
                * count($e_j$) += `c` <br>
    * for each ($f$, $e$) in count <br>
        * Set new parameters: $t_k(f \mid e)$ =  count($f,e$) / count($e$) <br>
* until k = 5

In [10]:
k = 0
# initialize theta uniformly
num_f = len(f_count)
theta = LLR_initialization(bitext)# defaultdict(lambda: 1./num_f)
while k < 5:
    k += 1
    tic = time.time()
    sys.stderr.write(f"Iteration {k}.................................\n")
    e_count = defaultdict(int)
    fe_count = defaultdict(int)
    for n in range(N):
        for f_i in bitext[n][0]:
            Z = 0
            for e_j in bitext[n][1]:
                Z += theta[(f_i, e_j)]
            for e_j in bitext[n][1]:
                c = theta[(f_i, e_j)] / Z
                fe_count[(f_i, e_j)] += c
                e_count[e_j] += c
    for (f_i, e_j) in fe_count.keys():
        theta[(f_i, e_j)] = fe_count[(f_i, e_j)] / e_count[e_j]
    toc = time.time()
    sys.stderr.write(f"Iteration {k} finished. Time cost: {toc-tic}\n")

Iteration 1.................................
Iteration 1 finished. Time cost: 86.66741967201233
Iteration 2.................................
Iteration 2 finished. Time cost: 87.8349084854126
Iteration 3.................................
Iteration 3 finished. Time cost: 87.54906606674194
Iteration 4.................................
Iteration 4 finished. Time cost: 87.58197975158691
Iteration 5.................................
Iteration 5 finished. Time cost: 88.14286184310913


* for each $(\textbf{f}, \textbf{e})$ in ${\cal D}$
    * for each $f_i$ in $\textbf{f}$
        * `bestp` = 0
        * `bestj` = 0
        * for each $e_j$ in $\textbf{e}$
            * if $t(f_i \mid e_j)$ > `bestp`
                * `bestp` = $t(f_i \mid e_j)$
                * `bestj` = $j$
        * align $f_i$ to $e_{\texttt{bestj}}$

In [11]:
sys.stderr.write("Aligning...\n")

Aligning...


In [12]:
%%capture --no-stderr dice_a
for f, e in bitext:
    for i in range(len(f)):
        f_i = f[i]
        bestp = 0
        bestj = 0
        for j in range(len(e)):
            e_j = e[j]
            if theta[(f_i, e_j)] > bestp:
                bestp = theta[(f_i, e_j)]
                bestj = j
        sys.stdout.write(f"{i}-{bestj} ")
    sys.stdout.write("\n")

In [13]:
# dump the output to the local file dice.a
with open('dice.a','w') as fh:
    fh.write(str(dice_a))

In [14]:
#%run check-alignments.py -i dice.a

In [15]:
%run score-alignments.py -n 0 -i dice.a

Precision = 0.553022
Recall = 0.702576
AER = 0.395796


## Add n smoothing 

In this trial, I added n smoothing to the baseline. The AER was improved from 0.34 to 0.3124. The value of n is set to be 0.01. I didn't see much difference it can make.

In [16]:
%%time
# f is the French word set
# e is the English word set
# f_count is the word count dictionary for French word set
# N is the number of sentences
f = set()
e = set()
f_count = defaultdict(int)
for pair in bitext:
    f = f.union(set(pair[0]))
    e = e.union(set(pair[1]))
    for f_i in set(pair[0]):
        f_count[f_i] += 1
N = len(bitext)

CPU times: user 1min 47s, sys: 15.6 ms, total: 1min 47s
Wall time: 1min 47s


In [17]:
# add n smoothing
smooth_n = 0.01
vocab_N = 100000

In [18]:
k = 0
# initialize theta uniformly
num_f = len(f_count)
theta = LLR_initialization(bitext)#defaultdict(lambda: 1./num_f)
while k < 5:
    k += 1
    tic = time.time()
    sys.stderr.write(f"Iteration {k}.................................\n")
    e_count = defaultdict(int)
    fe_count = defaultdict(int)
    for n in range(N):
        for f_i in bitext[n][0]:
            Z = 0
            for e_j in bitext[n][1]:
                Z += theta[(f_i, e_j)]
            for e_j in bitext[n][1]:
                c = theta[(f_i, e_j)] / Z
                fe_count[(f_i, e_j)] += c
                e_count[e_j] += c
    for (f_i, e_j) in fe_count.keys():
        theta[(f_i, e_j)] = (fe_count[(f_i, e_j)] + smooth_n) / (e_count[e_j] + vocab_N * smooth_n)
    toc = time.time()
    sys.stderr.write(f"Iteration {k} finished. Time cost: {toc-tic}\n")

Iteration 1.................................
Iteration 1 finished. Time cost: 87.42067885398865
Iteration 2.................................
Iteration 2 finished. Time cost: 87.03539776802063
Iteration 3.................................
Iteration 3 finished. Time cost: 87.75008034706116
Iteration 4.................................
Iteration 4 finished. Time cost: 86.99113941192627
Iteration 5.................................
Iteration 5 finished. Time cost: 87.16883683204651


In [19]:
%%capture --no-stderr dice_a
for f, e in bitext:
    for i in range(len(f)):
        f_i = f[i]
        bestp = 0
        bestj = 0
        for j in range(len(e)):
            e_j = e[j]
            if theta[(f_i, e_j)] > bestp:
                bestp = theta[(f_i, e_j)]
                bestj = j
        sys.stdout.write(f"{i}-{bestj} ")
    sys.stdout.write("\n")

In [20]:
# dump the output to the local file dice.a
with open('dice.a','w',encoding='utf8') as fh:
    fh.write(str(dice_a))

In [21]:
%run score-alignments.py -n 0 -i dice.a

Precision = 0.622858
Recall = 0.808073
AER = 0.313755


## Use posterior probabilities + add n smoothing

In this trial, I used posterior probabilities intead of argmax method. Depending on the value of the threshold, the precision and recall can be differed. There is always a tradeoff between precision and recall while adjusting the threshold. I indeed saw an improvement of AER.

In [22]:
%%time
# f is the French word set
# e is the English word set
# f_count is the word count dictionary for French word set
# N is the number of sentences
f = set()
e = set()
f_count = defaultdict(int)
for pair in bitext:
    f = f.union(set(pair[0]))
    e = e.union(set(pair[1]))
    for f_i in set(pair[0]):
        f_count[f_i] += 1
N = len(bitext)

CPU times: user 1min 47s, sys: 0 ns, total: 1min 47s
Wall time: 1min 47s


In [23]:
# add n smoothing
smooth_n = 0.01
vocab_N = 100000

In [24]:
k = 0
# initialize theta uniformly
num_f = len(f_count)
theta = LLR_initialization(bitext)#defaultdict(lambda: 1./num_f)
while k < 5:
    k += 1
    tic = time.time()
    sys.stderr.write(f"Iteration {k}.................................\n")
    e_count = defaultdict(int)
    fe_count = defaultdict(int)
    for n in range(N):
        for f_i in bitext[n][0]:
            Z = 0
            for e_j in bitext[n][1]:
                Z += theta[(f_i, e_j)]
            for e_j in bitext[n][1]:
                c = theta[(f_i, e_j)] / Z
                fe_count[(f_i, e_j)] += c
                e_count[e_j] += c
    for (f_i, e_j) in fe_count.keys():
        theta[(f_i, e_j)] = (fe_count[(f_i, e_j)] + smooth_n) / (e_count[e_j] + vocab_N * smooth_n)
    toc = time.time()
    sys.stderr.write(f"Iteration {k} finished. Time cost: {toc-tic}\n")

Iteration 1.................................
Iteration 1 finished. Time cost: 87.26017236709595
Iteration 2.................................
Iteration 2 finished. Time cost: 87.55622863769531
Iteration 3.................................
Iteration 3 finished. Time cost: 87.71975445747375
Iteration 4.................................
Iteration 4 finished. Time cost: 87.12346315383911
Iteration 5.................................
Iteration 5 finished. Time cost: 87.53006911277771


* for each $(\textbf{f},\textbf{e})$ in ${\cal D}$
    * for each $f_i$ in $\textbf{f}$
        * $Z = 0$
        * for each $e_j$ in $\textbf{e}$
            * $Z += t(f_i∣e_j)$
        * for each $e_j$ in $\textbf{e}$
            * $posterior$ = $t(f_i∣e_j)/ Z$
            * if $(posterior > \delta)$ keep alignment between $f_i$ and $e_j$

In [25]:
%%capture --no-stderr dice_a

# delta is the threshold we used to decide whether to keep the alignment pair
delta = 0.3

for f, e in bitext:
    for i, f_i in enumerate(f):
        Z = 0
        for j, e_j in enumerate(e):
            Z += theta[(f_i, e_j)]
        for j, e_j in enumerate(e):
            posterior = theta[(f_i, e_j)] / Z
            if posterior >= delta:
                sys.stdout.write(f"{i}-{j} ")
    sys.stdout.write("\n")

In [26]:
# dump the output to the local file dice.a
with open('dice.a','w',encoding='utf8') as fh:
    fh.write(str(dice_a))

In [27]:
%run score-alignments.py -n 0 -i dice.a

Precision = 0.634238
Recall = 0.832343
AER = 0.297843


## Intersection of alignments from two directions + add n smoothing

In this trial, I tried to build two models from both translation directions and used the intersection of two alignment sets as final alignment. The AER was improved dramatically.

In [28]:
%%time
# f is the French word set
# e is the English word set
# f_count is the word count dictionary for French word set
# e_count is the word count dictionary for English word set
# N is the number of sentences
f = set()
e = set()
f_count = defaultdict(int)
e_count = defaultdict(int)
for pair in bitext:
    f = f.union(set(pair[0]))
    e = e.union(set(pair[1]))
    for f_i in set(pair[0]):
        f_count[f_i] += 1
    for e_j in set(pair[1]):
        e_count[e_j] += 1
N = len(bitext)

CPU times: user 1min 47s, sys: 15.6 ms, total: 1min 47s
Wall time: 1min 47s


In [29]:
num_f = len(f_count)
num_e = len(e_count)

# add n smoothing
smooth_n = 0.01
vocab_N = 100000

In [32]:
def align(num, N, isReverse=False):
    '''
    num: size of target language word count dict
    N: number of sentences
    isReverse: if the translation direction is reversed
    '''
    k = 0
    # initialize theta uniformly
    theta = LLR_initialization(bitext,1,isReverse)#defaultdict(lambda: 1./num)
    while k < 5:
        k += 1
        tic = time.time()
        sys.stderr.write(f"Iteration {k}.................................\n")
        e_count = defaultdict(int)
        fe_count = defaultdict(int)
        if isReverse:
            for n in range(N):
                for f_i in bitext[n][1]:
                    Z = 0
                    for e_j in bitext[n][0]:
                        Z += theta[(f_i, e_j)]
                    for e_j in bitext[n][0]:
                        c = theta[(f_i, e_j)] / Z
                        fe_count[(f_i, e_j)] += c
                        e_count[e_j] += c
        else:
            for n in range(N):
                for f_i in bitext[n][0]:
                    Z = 0
                    for e_j in bitext[n][1]:
                        Z += theta[(f_i, e_j)]
                    for e_j in bitext[n][1]:
                        c = theta[(f_i, e_j)] / Z
                        fe_count[(f_i, e_j)] += c
                        e_count[e_j] += c
        for (f_i, e_j) in fe_count.keys():
            theta[(f_i, e_j)] = (fe_count[(f_i, e_j)] + smooth_n) / (e_count[e_j] + vocab_N * smooth_n)
        toc = time.time()
        sys.stderr.write(f"Iteration {k} finished. Time cost: {toc-tic}\n")
    return theta

In [33]:
theta_e2f = align(num_f, N, False)
theta_f2e = align(num_e, N, True)

Iteration 1.................................
Iteration 1 finished. Time cost: 70.76888418197632
Iteration 2.................................
Iteration 2 finished. Time cost: 71.99081039428711
Iteration 3.................................
Iteration 3 finished. Time cost: 72.3838472366333
Iteration 4.................................
Iteration 4 finished. Time cost: 71.63812279701233
Iteration 5.................................
Iteration 5 finished. Time cost: 71.80967497825623
Iteration 1.................................
Iteration 1 finished. Time cost: 68.51897931098938
Iteration 2.................................
Iteration 2 finished. Time cost: 69.1212432384491
Iteration 3.................................
Iteration 3 finished. Time cost: 68.7495653629303
Iteration 4.................................
Iteration 4 finished. Time cost: 68.99606847763062
Iteration 5.................................
Iteration 5 finished. Time cost: 69.08950090408325


In [34]:
%%capture --no-stderr dice_a
for f, e in bitext:
    set_e2f = set()
    set_f2e = set()
    for i in range(len(f)):
        f_i = f[i]
        bestp = 0
        bestj = 0
        for j in range(len(e)):
            e_j = e[j]
            if theta_e2f[(f_i, e_j)] > bestp:
                bestp = theta_e2f[(f_i, e_j)]
                bestj = j
        set_e2f.add((i, bestj))        
    for j in range(len(e)):
        e_j = e[j]
        bestp = 0
        besti = 0
        for i in range(len(f)):
            f_i = f[i]
            if theta_f2e[(e_j, f_i)] > bestp:
                bestp = theta_f2e[(e_j, f_i)]
                besti = i
        set_f2e.add((besti,j))
    set_combined = set_f2e.intersection(set_e2f)
    
    for pair in set_combined:
        sys.stdout.write(f"{pair[0]}-{pair[1]} ")
    sys.stdout.write("\n")

In [35]:
# dump the output to the local file dice.a
with open('dice.a','w',encoding='utf8') as fh:
    fh.write(str(dice_a))

In [36]:
%run score-alignments.py -n 0 -i dice.a

Precision = 0.845532
Recall = 0.724121
AER = 0.213048


## Alignment by agreement + add n smoothing + posterier probability

In this trial, I combined everything I've tried, including adding n smoothing, applying posterior probabilities while aligning, and applying alignment by two independent models. I experimented several thresholds (see below). As you may have noted, the threshold is almost the square of the threshold used in previous trial. The reason is we are thresholding the product of two posterior probabilities. Eventually, the optimized one was determined to be 0.08. I also tried different n (in add n smoothing) (results not shown). That won't make much difference on AER. So I simply set it to 0.01 as specified in the paper.

    n = 0.01 
    Threshold | AER score | Precision | Recall
    0.04      | 0.215656  | 0.747634  | 0.841010
    0.06      | 0.197782  | 0.800442  | 0.804606
    0.07      | 0.194017  | 0.817271  | 0.791481
    0.08      | 0.192571  | 0.829273  | 0.780337
    0.09      | 0.194620  | 0.836773  | 0.767707
    0.10      | 0.196451  | 0.844369  | 0.756067

In [37]:
%%time
# f is the French word set
# e is the English word set
# f_count is the word count dictionary for French word set
# e_count is the word count dictionary for English word set
# N is the number of sentences
f = set()
e = set()
f_count = defaultdict(int)
e_count = defaultdict(int)
for pair in bitext:
    f = f.union(set(pair[0]))
    e = e.union(set(pair[1]))
    for f_i in set(pair[0]):
        f_count[f_i] += 1
    for e_j in set(pair[1]):
        e_count[e_j] += 1
N = len(bitext)

CPU times: user 1min 48s, sys: 0 ns, total: 1min 48s
Wall time: 1min 48s


In [38]:
num_f = len(f_count)
num_e = len(e_count)

# add n smoothing
smooth_n = 0.01
vocab_N = 100000

In [39]:
def align(num, N, isReverse=False):
    '''
    num: size of target language word count dict
    N: number of sentences
    isReverse: if the translation direction is reversed
    '''
    k = 0
    # initialize theta uniformly
    theta = LLR_initialization(bitext,1,isReverse)#defaultdict(lambda: 1./num)
    while k < 5:
        k += 1
        tic = time.time()
        sys.stderr.write(f"Iteration {k}.................................\n")
        e_count = defaultdict(int)
        fe_count = defaultdict(int)
        if isReverse:
            for n in range(N):
                for f_i in bitext[n][1]:
                    Z = 0
                    for e_j in bitext[n][0]:
                        Z += theta[(f_i, e_j)]
                    for e_j in bitext[n][0]:
                        c = theta[(f_i, e_j)] / Z
                        fe_count[(f_i, e_j)] += c
                        e_count[e_j] += c
        else:
            for n in range(N):
                for f_i in bitext[n][0]:
                    Z = 0
                    for e_j in bitext[n][1]:
                        Z += theta[(f_i, e_j)]
                    for e_j in bitext[n][1]:
                        c = theta[(f_i, e_j)] / Z
                        fe_count[(f_i, e_j)] += c
                        e_count[e_j] += c
        for (f_i, e_j) in fe_count.keys():
            theta[(f_i, e_j)] = (fe_count[(f_i, e_j)] + smooth_n) / (e_count[e_j] + vocab_N * smooth_n)
        toc = time.time()
        sys.stderr.write(f"Iteration {k} finished. Time cost: {toc-tic}\n")
    return theta

In [40]:
theta_e2f = align(num_f, N, False)
theta_f2e = align(num_e, N, True)

Iteration 1.................................
Iteration 1 finished. Time cost: 69.84036207199097
Iteration 2.................................
Iteration 2 finished. Time cost: 70.30485582351685
Iteration 3.................................
Iteration 3 finished. Time cost: 70.22057962417603
Iteration 4.................................
Iteration 4 finished. Time cost: 70.36179447174072
Iteration 5.................................
Iteration 5 finished. Time cost: 70.32945227622986
Iteration 1.................................
Iteration 1 finished. Time cost: 69.34241795539856
Iteration 2.................................
Iteration 2 finished. Time cost: 69.60418105125427
Iteration 3.................................
Iteration 3 finished. Time cost: 69.29814672470093
Iteration 4.................................
Iteration 4 finished. Time cost: 69.81924319267273
Iteration 5.................................
Iteration 5 finished. Time cost: 69.46430253982544


In [41]:
%%capture --no-stderr dice_a

# delta is the threshold we used to decide whether to keep the alignment pair
delta = 0.08

for f, e in bitext:
    posterior_e2f = defaultdict(float)
    posterior_f2e = defaultdict(float)
    for i, f_i in enumerate(f):
        Z = 0
        for j, e_j in enumerate(e):
            Z += theta_e2f[(f_i, e_j)]
        for j, e_j in enumerate(e):
            posterior_e2f[(i,j)] = theta_e2f[(f_i, e_j)] / Z
    
    for j, e_j in enumerate(e):
        Z = 0
        for i, f_i in enumerate(f):
            Z += theta_f2e[(e_j, f_i)]
        for i, f_i in enumerate(f):
            posterior_f2e[(j,i)] = theta_f2e[(e_j, f_i)] / Z

    for pair in posterior_e2f.keys():
        posterior = posterior_e2f[pair] * posterior_f2e[(pair[1],pair[0])]
        if posterior > delta:
            sys.stdout.write(f"{pair[0]}-{pair[1]} ")
    sys.stdout.write("\n")

In [42]:
# dump the output to the local file dice.a
with open('dice.a','w',encoding='utf8') as fh:
    fh.write(str(dice_a))

In [43]:
%run score-alignments.py -n 0 -i dice.a

Precision = 0.723117
Recall = 0.841753
AER = 0.230146
