# Decoding CTC output

In [1]:
import pickle
import torch


# Load precomputed CTC output
with open('mystery_records.pickle', 'rb') as f:
    batch = pickle.load(f)

# log probabilities of softmax layers [batch_size, T, vocab_size]
log_probs = batch["log_probs"]

# Dictionary with index to character mapping
ind2char = batch["ind2char"]

# Index of special EMPTY token
EMPTY_TOK = '^'
EMPTY_IND = 0

In [7]:
log_probs.size()

torch.Size([2, 655, 28])

In [4]:
ind2char

{0: '^',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 27: ' '}

In [9]:
def ctc_decode(inds, ind2char):
    decoded = []
    last_char_ind = EMPTY_IND
    for ind in inds:
        if last_char_ind == ind:
            continue
        if ind != EMPTY_IND:
            decoded.append(ind2char[ind])
        last_char_ind = ind
    return "".join(decoded)

for i, rec in enumerate(log_probs):
    text = ctc_decode(rec.argmax(-1).numpy(), ind2char)
    print(f"{i}) {text}")

0) we nostrngesto love you know therols and so do i a foll commitment what i thinking of you wolden get this from any ather guy
1)  never gona give you up never donelet you down never go arun around and deset you never gon a make you cri never gonna say good by


# Computing WER and CER
Task: Implemet WER and CER metrics

In [10]:
# library for fast quick calculation of edit distance
import editdistance

def calc_wer(target_text: str, pred_text: str):
    if not target_text:
        return 1
    return editdistance.eval(target_text.split(), pred_text.split()) \
     / len(target_text.split())


def calc_cer(target_text: str, pred_text: str):
    if not target_text:
        return 1
    return editdistance.eval(target_text, pred_text) / len(target_text)

In [11]:
import numpy as np

for target, pred, expected_wer, expected_cer in [
    ("if you can not measure it you can not improve it",
     "if you can nt measure t yo can not i",
     0.454, 0.25),
    ("if you cant describe what you are doing as a process you dont know what youre doing",
     "if you cant describe what you are doing as a process you dont know what youre doing",
     0.0, 0.0),
    ("one measurement is worth a thousand expert opinions",
     "one  is worth thousand opinions",
     0.375, 0.392)
]:
    wer = calc_wer(target, pred)
    cer = calc_cer(target, pred)
    assert np.isclose(wer, expected_wer, atol=1e-3), f"true: {target}, pred: {pred}, expected wer {expected_wer} != your wer {wer}"
    assert np.isclose(cer, expected_cer, atol=1e-3), f"true: {target}, pred: {pred}, expected cer {expected_cer} != your cer {cer}"


Task: come up with such a pair of target-prediction texts, so the
1) WER > 1.0
2) CER > WER

In [12]:
# 1) WER > 1.0
target, prediction = "a" , "a a a a a"
assert calc_wer(target, prediction) > 1.0

# 2) CER > WER
# your code here
target, prediction = "a a a", "bbbbbb a a"
assert calc_wer(target, prediction) < calc_cer(target, prediction)

# Beam search
Task: implement beam-search on CTC outputs

In [13]:
# Load precomputed CTC output
with open('lj_batch.pickle', 'rb') as f:
    batch = pickle.load(f)

# log probabilities of softmax layers [batch_size, T, vocab_size]
log_probs = batch["log_probs"]

# Dictionary with index to character mapping
ind2char = batch["ind2char"]

true_texts = batch["text"]

In [17]:
probs = log_probs.exp()

In [18]:
probs.size()

torch.Size([10, 310, 28])

In [42]:
from collections import defaultdict
from tqdm import tqdm

def expand_and_merge_paths(dp, next_token_probs, ind2char):
    new_dp = defaultdict(float)
    for ind, next_token_prob in enumerate(next_token_probs):
        cur_char = ind2char[ind]
        for (prefix, last_char), v in dp.items():
            if last_char == cur_char:
                new_prefix = prefix
            else:
                if cur_char == EMPTY_TOK:
                    new_prefix = prefix
                else:
                    new_prefix = prefix + cur_char
            new_dp[(new_prefix, cur_char)] += v * next_token_prob

    return new_dp

def truncate_paths(dp, beam_size):
    return dict(sorted(list(dp.items()), key=lambda x: -x[1])[:beam_size])

def ctc_beam_search(probs, beam_size, ind2char):
    dp ={
        ("", EMPTY_TOK): 1.0,
    }
    for prob in probs:
        dp = expand_and_merge_paths(dp, prob, ind2char)
        dp = truncate_paths(dp, beam_size)
    dp = [(prefix, proba) for (prefix, _), proba in sorted(dp.items(), key=lambda x: -x[1])]
    return dp

bs_results = []
for log_probs_line in log_probs:
    bs_results.append(ctc_beam_search(log_probs_line.exp().numpy(), 100, ind2char))

In [47]:
for i in range(len(true_texts)):
    beam_search_hypos = bs_results[i][:3]
    true_text = true_texts[i]
    argmax_text = ctc_decode(log_probs[i].numpy().argmax(-1), ind2char)
    print("True: ", true_text)
    print(f"Argmax: {argmax_text} --- (CER: {calc_cer(true_text, argmax_text):.3f})")
    for ind, (hypo, score) in enumerate(beam_search_hypos):
        print(f"{ind+1}) '{hypo}' --- (CER: {calc_cer(true_text, hypo):.3f})")
    print('-' * 100)

True:  he would go to her and tell her all his family complications
Argmax: he wld ge toher iand tell her all mhisan ly omblications --- (CER: 0.200)
1) 'he wl ge to her iand tell her all hisan ly omblications' --- (CER: 0.183)
2) 'he wl ge to her and tell her all hisan ly omblications' --- (CER: 0.167)
3) 'he wl ge to her iand tell her all hisanly omblications' --- (CER: 0.183)
----------------------------------------------------------------------------------------------------
True:  he did not say the last as a boast but merely as an assurance to the liveryman who he saw was anxious on his account
Argmax: he did not sad the last is a bost but mearlioves an asurance to the livery man who re saw was anxes on his account --- (CER: 0.129)
1) 'he did not say the last is a bost but merli oves an a surance to the livery man who re saw was anxes on his account' --- (CER: 0.112)
2) 'he did not say the last as a bost but merli oves an a surance to the livery man who re saw was anxes on his acc