In [1]:
from kaldi.decoder import LatticeFasterOnlineDecoder, LatticeFasterDecoderOptions, DecodableMatrixScaled
from kaldi.fstext import SymbolTable, StdVectorFst, read_fst_kaldi, utils
from kaldi.util.table import SequentialMatrixReader
from kaldi.matrix import Matrix

In [2]:
from kaldi.fstext.utils import get_linear_symbol_sequence

In [3]:
import re
import time
import pickle
import numpy as np

In [4]:
from edit_distance import SequenceMatcher
import torch
from neural_decoder.dataset import SpeechDataset

import matplotlib.pyplot as plt

In [5]:
from neural_decoder.neural_decoder_trainer import getDatasetLoaders
from neural_decoder.neural_decoder_trainer import loadModel

In [6]:
#not needed since not using LLM model
import torch
import huggingface_hub
from transformers import AutoTokenizer, AutoModelForCausalLM 

In [7]:
import torch.nn.functional as F
import os
import numpy as np
import math

In [8]:
class DecodableTensorScaled:
#     def __init__(self, acoustic_scale: float):
    def __init__(self):
#         self.acoustic_scale = acoustic_scale
        self.reset()
    def reset(self):
        self.num_frames_ready = 0
        self.done = False
        self.logp = []
#         self.dms = None
    def num_frames_ready(self):
        return self.num_frames_ready
    def IsLastFrame(self, frame):
        assert frame < self.num_frames_ready
        return self.done and (frame == self.num_frames_ready - 1)
    def LogLikelihood(self, frame, index):
        assert frame < self.num_frames_ready
        assert 0 <= index < len(self.logp)
        return self.acoustic_scale * self.logp[index]

    
    def AcceptLoglikes(self, logp):
        self.num_frames_ready += 1
        self.logp.append(logp.numpy())
#         self.logp = logp.numpy()

    def SetFinish(self):
        self.done = True




In [9]:
class CtcWfstBeamSearch:
    def __init__(self, fst_, opts: LatticeFasterDecoderOptions, symbol_table: SymbolTable, acoustic_scale, nbest):
        self.decodable = DecodableTensorScaled()
        self.decoder = LatticeFasterOnlineDecoder(fst_, opts)
        self.symbol_table = symbol_table
        self.opts = opts
        self.nbest = nbest
        self.blank_scale = 1
        self.blank_skip_thresh = 0.95
        self.blank = 1
        self.acoustic_scale = acoustic_scale
        self.reset()
        

    def reset(self):
        self.num_frames = 0
        self.decoded_frames_mapping = []
        self.last_best = 0
        self.inputs = []
        self.outputs = []
        self.likelihood = []
        self.times = []
        self.decodable.reset()
        self.is_last_frame_blank = False
        self.decoder.init_decoding()


#     def decode_matrix_one(self):
#         if self.decodable.num_frames_ready == 0:
#             raise ValueError("No frames to decode!")
#         logp = []
#         logp.append(self.decodable.logp)
#         m = Matrix(np.array(logp))
#         assert(m.numpy().shape == (1, 41))
#         print("shape: ", m.numpy().shape)
#         dms = DecodableMatrixScaled(m, self.acoustic_scale)
#         assert(dms.num_indices() == 41)
#         print("num indices: ", dms.num_indices())
#         self.decoder.advance_decoding(dms)
        
    def decode_matrix(self):
        if self.decodable.num_frames_ready == 0:
            raise ValueError("No frames to decode!")
#         logp = []
#         logp.append(self.decodable.logp)
        m = Matrix(np.array(self.decodable.logp))
#         assert(m.numpy().shape == (42, 41))
        print("shape: ", m.numpy().shape)
        dms = DecodableMatrixScaled(m, self.acoustic_scale)
#         assert(dms.num_indices() == 41)
        print("num indices: ", dms.num_indices())
        self.decoder.advance_decoding(dms)
            
    def search(self, logp):
        if(len(logp) == 0):
            return
        for i, frame_logp in enumerate(logp):
            blank_score = np.exp(frame_logp[self.blank])
            if blank_score > self.blank_skip_thresh * self.blank_scale:
#                 print("blank? ",blank_score)
                self.is_last_frame_blank = True
                self.last_frame_prob = frame_logp
            else:
#                 print("got here",blank_score)
                cur_best = np.argmax(frame_logp)
                if (cur_best != self.blank and self.is_last_frame_blank and cur_best == self.last_best):
                    self.decodable.AcceptLoglikes(self.last_frame_prob)
#                     self.decode_matrix()
                    self.decoded_frames_mapping.append(self.num_frames - 1)
                self.last_best = cur_best
                self.decodable.AcceptLoglikes(frame_logp)
#                 self.decode_matrix()
                self.decoded_frames_mapping.append(self.num_frames)
                self.is_last_frame_blank = False
            self.num_frames += 1

            

        self.decode_matrix()
        self.inputs.clear()
        self.outputs.clear()
        self.likelihood.clear()
        print("decoded frames length: ", len(self.decoded_frames_mapping))
        if len(self.decoded_frames_mapping) > 0:
            self.inputs.append([])
            self.outputs.append([])
            self.likelihood.append(0)
            lat = self.decoder.get_best_path()
            alignment, words, weight = utils.get_linear_symbol_sequence(lat)
            print("words: ", words)
            self.convert_to_inputs(alignment)
            self.outputs[0] = words
            self.likelihood[0] = -(weight.value1 + weight.value2)

    def finalize_search(self):
        self.decodable.SetFinish()
        self.decoder.finalize_decoding()
        self.inputs.clear()
        self.outputs.clear()
        self.likelihood.clear()
        self.times.clear()
        if len(self.decoded_frames_mapping) > 0:
            nbest_lats = []
#             if self.nbest == 1:
            lat = self.decoder.get_best_path()
            nbest_lats.append(lat)
#             else:
#                 clat = self.decoder.get_lattice()
                
#                 lat = clat.to_lattice()
#                 print(lat)
#                 nbest_lat = lat.shortest_path(self.nbest)
#                 nbest_lats = nbest_lat.convert_nbest_to_vector()
#                 print(nbest_lats)

            nbest = len(nbest_lats)
            self.inputs = [[] for _ in range(nbest)]
            self.outputs = [[] for _ in range(nbest)]
            self.likelihood = [0.0 for _ in range(nbest)]
            self.times = [[] for _ in range(nbest)]
            print("nbest: ", len(nbest_lats))
            for i, nbest_lat in enumerate(nbest_lats):
                print("nbl: ", nbest_lat)
                alignment, words, weight = get_linear_symbol_sequence(nbest_lat)
                print("words?: ", alignment, " ", words, " ", weight)
                self.convert_to_inputs(alignment, self.times[i])
#                 self.convert_to_inputs(alignment, self.inputs[i], self.times[i])
                self.outputs[i] = words
                self.likelihood[i] = -(weight.value1 + weight.value2)


    def convert_to_inputs(self, alignment, time = None):
        self.inputs[0].clear()
        if time is not None:
            time.clear()
        for cur in range(len(alignment)):
            if alignment[cur]-1 == self.blank:
                continue
            if cur > 0 and alignment[cur] == alignment[cur-1]:
                continue
            self.inputs[0].append(alignment[cur]-1)
            if time is not None:
                time.append(self.decoded_frames_mapping[cur])


In [10]:
class DecodeResult:
    def __init__(self):
        self.lm_score = 0.0
        self.ac_score = 0.0
        self.sentence = ""

In [11]:
class PyKaldiDecoder:
    def __init__(self, model_path, acoustic_scale=0.5, nbest=100, beam=18):
        fst_path = os.path.join(model_path, "TLG.fst")
        if not os.path.exists(fst_path):
            raise ValueError(f"TLG.fst not found in {model_path}")
        self.fst = StdVectorFst.read(fst_path)
        self.acoustic_scale = acoustic_scale
        opts = LatticeFasterDecoderOptions()
#         opts.acoustic_scale = acoustic_scale
#         opts.nbest = nbest
        opts.beam = beam
        opts.max_active = 7000
        opts.min_active = 200
        opts.lattice_beam = 8
#         opts.ctc_blank_skip_threshold = 1.0
#         opts.length_penalty = 0.0
        self.symbol_table = SymbolTable.read_text(os.path.join(model_path, "words.txt"))
        self.decoder = CtcWfstBeamSearch(self.fst, opts, self.symbol_table, acoustic_scale, nbest)
        self.results = []
    def decode(self, logp):
        self.decoder.reset()
        self.decoder.search(logp)
        self.updateResult()
        # self.decoder.finalize_search()
        # return self.decoder.outputs, self.decoder.likelihood
        return
    def finishdecoding(self):
        self.decoder.finalize_search()
        self.updateResult()
        return
    def updateResult(self):
        hypothesis = self.decoder.outputs
        likelihood = self.decoder.likelihood
        self.results.clear()

        assert len(hypothesis) == len(likelihood)
        for i, hypothesis in enumerate(hypothesis):
            dr = DecodeResult()
            dr.lm_score = likelihood[i] # might need to double check this?
            dr.ac_score = likelihood[i] / self.acoustic_scale

            for token in hypothesis:
                dr.sentence += f' {self.symbol_table.find_symbol(token)}'#_symbol(token)

            dr.sentence = dr.sentence.strip()
            self.results.append(dr)

        if (len(self.results) > 0 and bool(self.results[0].sentence)):
            print(f"Partial CTC result: {self.results[0].sentence}")
        return

    def get_results(self):
        return self.results


In [12]:
def rearrange_speech_logits(logits, has_sil=False):
    if not has_sil:
        logits = np.concatenate([logits[:, :, -1:], logits[:, :, :-1]], axis=-1)
    else:
        logits = np.concatenate([logits[:, :, -1:], logits[:, :, -2:-1], logits[:, :, :-2]], axis=-1)
    return logits

In [13]:
def lm_decode(pydecoder, logits, returnNBest=False, rescore=False, blankPenalty=0.0, logPriors=None):
    assert len(logits.shape) == 2
    logPriors = torch.from_numpy(np.zeros([1, logits.shape[1]]))
    log_probs = F.log_softmax(torch.from_numpy(logits), dim=-1)
#     print(log_probs.shape)
#     print(logPriors)
    log_probs = log_probs - logPriors
    # apply blank penalty
    blank_log_probs = log_probs[:, 0:1]
    log_probs[:, 0:1] = blank_log_probs - blankPenalty
    pydecoder.decode(log_probs)
#     pydecoder.finishdecoding()
    results = pydecoder.results
    if returnNBest:
        decoded = []
        for r in results:
            decoded.append((r.sentence, r.ac_score, r.lm_score))
        return decoded
    else:
        return results[0].sentence #?


In [14]:
args = {}
# args['outputDir'] = '/oak/stanford/groups/henderj/stfan/logs/speech_logs/' + modelName
args['datasetPath'] = '/home/iris/project_3_bci/workload_characterization/id20_neural_decode/data/competition_data/ptDecoder_ctc'
args['seqLen'] = 150
args['maxTimeSeriesLen'] = 1200
args['batchSize'] = 8 #64
args['lrStart'] = 0.02
args['lrEnd'] = 0.02
args['nUnits'] = 1024
args['nBatch'] = 10000 #3000
args['nLayers'] = 5
args['seed'] = 0
args['nClasses'] = 40
args['nInputFeatures'] = 256
args['dropout'] = 0.4
args['whiteNoiseSD'] = 0.8
args['constantOffsetSD'] = 0.2
args['gaussianSmoothWidth'] = 2.0
args['strideLen'] = 4
args['kernelLen'] = 32
args['bidirectional'] = True
args['l2_decay'] = 1e-5

In [15]:
trainLoaders, testLoaders, loadedData = getDatasetLoaders(
    args["datasetPath"], args["batchSize"]
)

In [16]:
modelPath = '/home/iris/project_3_bci/workload_characterization/id20_neural_decode/model/speechBaseline4'
model = loadModel(modelPath, device="cpu")
device = "cpu"
model.eval()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


GRUDecoder(
  (inputLayerNonlinearity): Softsign()
  (unfolder): Unfold(kernel_size=(32, 1), dilation=1, padding=0, stride=4)
  (gaussianSmoother): GaussianSmoothing()
  (gru_decoder): GRU(8192, 1024, num_layers=5, batch_first=True, dropout=0.4, bidirectional=True)
  (inpLayer0): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer1): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer2): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer3): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer4): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer5): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer6): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer7): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer8): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer9): Linear(in_features=256, out_features=256, bias=True)
  (inpLayer10): Linear(in_features=256, out_features=2

In [17]:
rnn_outputs = {
    "logits": [],
    "logitLengths": [],
    "trueSeqs": [],
    "transcriptions": [],
}

partition = "competition" # "test"
if partition == "competition":
    testDayIdxs = [4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20]
elif partition == "test":
    testDayIdxs = range(len(loadedData[partition]))

for i, testDayIdx in enumerate(testDayIdxs):
    test_ds = SpeechDataset([loadedData[partition][i]])
    test_loader = torch.utils.data.DataLoader(
        test_ds, batch_size=1, shuffle=False, num_workers=0
    )
    for j, (X, y, X_len, y_len, _) in enumerate(test_loader):
        X, y, X_len, y_len, dayIdx = (
            X.to(device),
            y.to(device),
            X_len.to(device),
            y_len.to(device),
            torch.tensor([testDayIdx], dtype=torch.int64).to(device),
        )
        pred = model.forward(X, dayIdx)
        adjustedLens = ((X_len - model.kernelLen) / model.strideLen).to(torch.int32)

        for iterIdx in range(pred.shape[0]):
            trueSeq = np.array(y[iterIdx][0 : y_len[iterIdx]].cpu().detach())

            rnn_outputs["logits"].append(pred[iterIdx].cpu().detach().numpy())
            rnn_outputs["logitLengths"].append(
                adjustedLens[iterIdx].cpu().detach().item()
            )
            rnn_outputs["trueSeqs"].append(trueSeq)

        transcript = loadedData[partition][i]["transcriptions"][j].strip()
        transcript = re.sub(r"[^a-zA-Z\- \']", "", transcript)
        transcript = transcript.replace("--", "").lower()
        rnn_outputs["transcriptions"].append(transcript)


  return self.conv(input, weight=self.weight, groups=self.groups, padding="same")


In [18]:
# print(dir(LatticeFasterDecoderOptions))

In [19]:
# print(dir(DecodableTensorScaled))

In [20]:
lmDir = "/home/iris/project_3_bci/workload_characterization/id20_neural_decode/model/languageModel"
ngramDecoder = PyKaldiDecoder(lmDir)

In [21]:
# LM decoding hyperparameters
acoustic_scale = 0.5
blank_penalty = np.log(7)
llm_weight = 0.5

llm_outputs = []
# Generate nbest outputs from 5gram LM
start_t = time.time()
nbest_outputs = []
for j in range(len(rnn_outputs["logits"])):
# for j in range(1):
    logits = rnn_outputs["logits"][j]
    logits = np.concatenate(
        [logits[:, 1:], logits[:, 0:1]], axis=-1
    )  # Blank is last token
    logits = rearrange_speech_logits(logits[None, :, :], has_sil=True)
    nbest = lm_decode(
        ngramDecoder,
        logits[0],
        blankPenalty=blank_penalty,
        returnNBest=True,
        rescore=True,
    )
    nbest_outputs.append(nbest)
# time_per_sample = (time.time() - start_t) / len(rnn_outputs["logits"])
# print(f"decoding took {time_per_sample} seconds per sample")

shape:  (42, 41)
num indices:  41
decoded frames length:  42
words:  [53919, 81014, 41602, 21600]
Partial CTC result: I'M ORIGINALLY FROM COLORADO
shape:  (76, 41)
num indices:  41
decoded frames length:  76
words:  [53551, 47579, 8999, 198, 113151, 68142, 3, 65856]
Partial CTC result: I HAD BEEN ABLE TO MAKE A LOAN
shape:  (35, 41)
num indices:  41
decoded frames length:  35
words:  [53551, 47579, 78277, 66847]
Partial CTC result: I HAD NO LUCK
shape:  (43, 41)
num indices:  41
decoded frames length:  43
words:  [53551, 112496, 112089, 113536, 120271, 12642]
Partial CTC result: I THOUGHT THE TOPIC WAS BORING
shape:  (38, 41)
num indices:  41
decoded frames length:  38
words:  [121365, 32140, 117016, 112201]
Partial CTC result: WHATEVER DRIVE UP THERE
shape:  (38, 41)
num indices:  41
decoded frames length:  38
words:  [124047, 31415, 43780, 56053]
Partial CTC result: YOU DON'T GET INVOLVED
shape:  (41, 41)
num indices:  41
decoded frames length:  41
words:  [52152, 124047, 35192, 1121

decoded frames length:  20
words:  [78277, 26628, 86836]
Partial CTC result: NO DATA POINTS
shape:  (28, 41)
num indices:  41
decoded frames length:  28
words:  [56499, 75588, 78916]
Partial CTC result: IT'S MOVING NORTH
shape:  (26, 41)
num indices:  41
decoded frames length:  26
words:  [53551, 29553]
Partial CTC result: I DIDN'T
shape:  (44, 41)
num indices:  41
decoded frames length:  44
words:  [110428, 27187]
Partial CTC result: TACTICAL DECISION
shape:  (29, 41)
num indices:  41
decoded frames length:  29
words:  [5567, 82291]
Partial CTC result: ASIA PACIFIC
shape:  (58, 41)
num indices:  41
decoded frames length:  58
words:  [76335, 65711, 80542, 56270, 20400, 17495]
Partial CTC result: MY LITTLE ONE IS CLASSIC CARS
shape:  (64, 41)
num indices:  41
decoded frames length:  64
words:  [54319, 37338, 112089, 39330, 123755, 120518, 121084, 67691]
Partial CTC result: IN FACT THE FIRST YEAR WE WERE MADE
shape:  (31, 41)
num indices:  41
decoded frames length:  31
words:  [112257, 1

decoded frames length:  40
words:  [112074, 3524, 54682, 112320]
Partial CTC result: THAT'S AN INDIVIDUAL THING
shape:  (29, 41)
num indices:  41
decoded frames length:  29
words:  [123069, 54319, 39938]
Partial CTC result: WORKING IN FLORIDA
shape:  (65, 41)
num indices:  41
decoded frames length:  65
words:  [53551, 65147, 113151, 120358, 2785, 2519, 84364]
Partial CTC result: I LIKE TO WATCH ALMOST ALL PEOPLE
shape:  (57, 41)
num indices:  41
decoded frames length:  57
words:  [71356, 121362, 124047, 114391, 112089, 98378, 44957]
Partial CTC result: ME WHAT YOU TREASURE THE SAYING GOES
shape:  (70, 41)
num indices:  41
decoded frames length:  70
words:  [112089, 69827, 120271, 54319, 3653, 86440, 5865, 112367, 86825]
Partial CTC result: THE MATCH WAS IN AND PLAYING AT THIS POINT
shape:  (104, 41)
num indices:  41
decoded frames length:  104
words:  [101577, 47579, 58084, 45479, 84780, 256, 3, 47868, 9044]
Partial CTC result: SHE HAD JUST GOT PERSONAL ABOUT A HALF-HOUR BEFORE
shape: 

decoded frames length:  52
words:  [56499, 79014, 118559, 55356]
Partial CTC result: IT'S NOT VERY INSPIRING
shape:  (63, 41)
num indices:  41
decoded frames length:  63
words:  [124047, 88660, 49168, 2519, 59876, 79959, 114426]
Partial CTC result: YOU PROBABLY HAVE ALL KINDS OF TREES
shape:  (55, 41)
num indices:  41
decoded frames length:  55
words:  [2881, 48922, 10500, 27192, 113151, 68142]
Partial CTC result: ALSO HAS BIG DECISIONS TO MAKE
shape:  (29, 41)
num indices:  41
decoded frames length:  29
words:  [35192, 65753, 76194]
Partial CTC result: ENJOY LIVE MUSIC
shape:  (43, 41)
num indices:  41
decoded frames length:  43
words:  [112201, 4912, 84364, 5240, 50356]
Partial CTC result: THERE ARE PEOPLE AROUND HERE
shape:  (54, 41)
num indices:  41
decoded frames length:  54
words:  [53551, 112326, 2519, 122852, 4912, 45005, 113151, 123047]
Partial CTC result: I THINK ALL WOMEN ARE GOING TO WORK
shape:  (52, 41)
num indices:  41
decoded frames length:  52
words:  [3653, 53551, 771

decoded frames length:  46
words:  [68801, 30913, 3653, 86228]
Partial CTC result: MANY DO AND PLACES
shape:  (62, 41)
num indices:  41
decoded frames length:  62
words:  [112063, 56270, 112089, 48547, 83244, 88660]
Partial CTC result: THAT IS THE HARDEST PART PROBABLY
shape:  (32, 41)
num indices:  41
decoded frames length:  32
words:  [3653, 63810, 112089, 59611]
Partial CTC result: AND LEAVE THE KIDS
shape:  (37, 41)
num indices:  41
decoded frames length:  37
words:  [112257, 121084, 81496, 54319, 76335, 42629]
Partial CTC result: THEY WERE OUT IN MY GARAGE
shape:  (68, 41)
num indices:  41
decoded frames length:  68
words:  [53919, 79014, 40367, 23144, 52620, 75673, 56270, 105689]
Partial CTC result: I'M NOT FOR CONTROLLING HOW MUCH IS SPENT
shape:  (30, 41)
num indices:  41
decoded frames length:  30
words:  [15659, 5444, 117286]
Partial CTC result: BUSINESS AS USUAL
shape:  (22, 41)
num indices:  41
decoded frames length:  22
words:  [112260, 36506]
Partial CTC result: THEY'RE E

decoded frames length:  45
words:  [124047, 4912, 45005, 113151, 43780, 18029, 4345]
Partial CTC result: YOU ARE GOING TO GET CAUGHT ANYWAY
shape:  (47, 41)
num indices:  41
decoded frames length:  47
words:  [112063, 70228, 56499, 79014, 77051]
Partial CTC result: THAT MAYBE IT'S NOT NECESSARY
shape:  (50, 41)
num indices:  41
decoded frames length:  50
words:  [104904, 121660, 60664, 70228, 22318]
Partial CTC result: SOMEONE WHO KNOWS MAYBE CONCLUSION
shape:  (34, 41)
num indices:  41
decoded frames length:  34
words:  [90340, 3, 10994, 79959, 33059, 120678]
Partial CTC result: QUITE A BIT OF EACH WEEK
shape:  (46, 41)
num indices:  41
decoded frames length:  46
words:  [120949, 53551, 46962, 112367, 56270, 56450]
Partial CTC result: WELL I GUESS THIS IS IT
shape:  (26, 41)
num indices:  41
decoded frames length:  26
words:  [57804, 56270, 91569, 6987]
Partial CTC result: JOY IS REALLY BAD
shape:  (75, 41)
num indices:  41
decoded frames length:  75
words:  [49337, 48922, 112089, 976

decoded frames length:  61
words:  [53551, 16563, 30913, 35256, 26219, 122580, 112089, 115668]
Partial CTC result: I CAN DO ENOUGH DAMAGE WITH THE U
shape:  (34, 41)
num indices:  41
decoded frames length:  34
words:  [116165, 99172]
Partial CTC result: UNDERGRADUATE SCHOOL
shape:  (32, 41)
num indices:  41
decoded frames length:  32
words:  [121372, 123315, 122580, 50298, 119309]
Partial CTC result: WHAT'S WRONG WITH HER VOICE
shape:  (44, 41)
num indices:  41
decoded frames length:  44
words:  [112257, 100691, 117238, 3, 65711, 12452]
Partial CTC result: THEY SEND US A LITTLE BOOKLET
shape:  (37, 41)
num indices:  41
decoded frames length:  37
words:  [5444, 21442, 15865, 115668]
Partial CTC result: AS COLLATERAL BY U
shape:  (27, 41)
num indices:  41
decoded frames length:  27
words:  [121362, 56270, 112063, 112322, 44804]
Partial CTC result: WHAT IS THAT THINGS GO
shape:  (37, 41)
num indices:  41
decoded frames length:  37
words:  [104517, 120518, 31415, 60650, 121372, 112089, 868

decoded frames length:  63
words:  [112063, 120494, 256, 21133, 120949]
Partial CTC result: THAT WAY ABOUT CODE WELL
shape:  (36, 41)
num indices:  41
decoded frames length:  36
words:  [15726, 120518, 29553, 60650, 56450]
Partial CTC result: BUT WE DIDN'T KNOW IT
shape:  (42, 41)
num indices:  41
decoded frames length:  42
words:  [76335, 46962, 56270, 117287, 88360, 120949]
Partial CTC result: MY GUESS IS USUALLY PRETTY WELL
shape:  (56, 41)
num indices:  41
decoded frames length:  56
words:  [256, 40809, 80834, 39465, 18423, 3, 87712]
Partial CTC result: ABOUT FOUR OR FIVE CENTS A POUND
shape:  (96, 41)
num indices:  41
decoded frames length:  96
words:  [112063, 112257, 47579, 113151, 89853, 56450, 54319, 3653, 56450, 120271, 3, 19632]
Partial CTC result: THAT THEY HAD TO PUT IT IN AND IT WAS A CHOICE
shape:  (40, 41)
num indices:  41
decoded frames length:  40
words:  [3, 66504, 79959, 112140, 113151, 44804, 80517]
Partial CTC result: A LOT OF THEM TO GO ON
shape:  (33, 41)
num in

decoded frames length:  51
words:  [112089, 37660, 4912, 91569, 110574, 81750]
Partial CTC result: THE FANS ARE REALLY TAKING OVER
shape:  (27, 41)
num indices:  41
decoded frames length:  27
words:  [112074, 76335, 38028, 111172]
Partial CTC result: THAT'S MY FAVORITE TEAM
shape:  (57, 41)
num indices:  41
decoded frames length:  57
words:  [83244, 79959, 56450, 56270, 3646, 111258]
Partial CTC result: PART OF IT IS ANCIENT TECHNOLOGY
shape:  (81, 41)
num indices:  41
decoded frames length:  81
words:  [112089, 80542, 48922, 3, 25195, 95606, 54319, 3, 23194]
Partial CTC result: THE ONE HAS A CRUCIAL ROLE IN A CONVERSION
shape:  (40, 41)
num indices:  41
decoded frames length:  40
words:  [76335, 82169, 103500]
Partial CTC result: MY OWN SKIN
shape:  (111, 41)
num indices:  41
decoded frames length:  111
words:  [112089, 77602, 16504, 48922, 3032, 29541, 80517, 79280, 79730]
Partial CTC result: THE NEW CAMPAIGN HAS ALWAYS DID ON NUMEROUS OCCASIONS
shape:  (23, 41)
num indices:  41
deco

decoded frames length:  33
words:  [56499, 3, 57804, 113151, 9143]
Partial CTC result: IT'S A JOY TO BEHOLD
shape:  (92, 41)
num indices:  41
decoded frames length:  92
words:  [113151, 75567, 40727, 120518, 49168, 113151, 68142, 12825, 118531, 5444, 120949]
Partial CTC result: TO MOVE FORWARD WE HAVE TO MAKE BOTH VERSIONS AS WELL
shape:  (53, 41)
num indices:  41
decoded frames length:  53
words:  [56450, 91569, 120271, 104918, 63257, 80517]
Partial CTC result: IT REALLY WAS SOMETHING LATER ON
shape:  (53, 41)
num indices:  41
decoded frames length:  53
words:  [120518, 49168, 71537, 79153]
Partial CTC result: WE HAVE MEDICAL NOW
shape:  (48, 41)
num indices:  41
decoded frames length:  48
words:  [56499, 3524, 54531, 74538]
Partial CTC result: IT'S AN INCREDIBLE MOMENT
shape:  (45, 41)
num indices:  41
decoded frames length:  45
words:  [54319, 3524, 5320]
Partial CTC result: IN AN ARSENAL
shape:  (52, 41)
num indices:  41
decoded frames length:  52
words:  [3653, 124048, 65147, 1131

decoded frames length:  27
words:  [121447, 48922, 64552, 112904]
Partial CTC result: WHICH HAS LESS TIME
shape:  (49, 41)
num indices:  41
decoded frames length:  49
words:  [76335, 51872, 56270, 39002, 122580, 123755, 1496, 123755]
Partial CTC result: MY HOME IS FILLED WITH YEAR AFTER YEAR
shape:  (28, 41)
num indices:  41
decoded frames length:  28
words:  [53551, 112326, 112493, 4912, 6604]
Partial CTC result: I THINK THOSE ARE AWFUL
shape:  (54, 41)
num indices:  41
decoded frames length:  54
words:  [3, 45219, 71421, 79959, 54476]
Partial CTC result: A GOOD MEASURE OF INCOME
shape:  (68, 41)
num indices:  41
decoded frames length:  68
words:  [120518, 4912, 40120, 80517, 63063, 4914]
Partial CTC result: WE ARE FOCUSED ON LARGE AREAS
shape:  (46, 41)
num indices:  41
decoded frames length:  46
words:  [124047, 107528, 45479, 3, 88360, 45219, 106771]
Partial CTC result: YOU STILL GOT A PRETTY GOOD START
shape:  (42, 41)
num indices:  41
decoded frames length:  42
words:  [121362, 5

decoded frames length:  30
words:  [121362, 104899, 84364]
Partial CTC result: WHAT SOME PEOPLE
shape:  (37, 41)
num indices:  41
decoded frames length:  37
words:  [121447, 56270, 118559, 21906]
Partial CTC result: WHICH IS VERY COMMON
shape:  (63, 41)
num indices:  41
decoded frames length:  63
words:  [53551, 31415, 91569, 17124, 113151, 44804, 81496, 3653, 119128]
Partial CTC result: I DON'T REALLY CARE TO GO OUT AND VISIT
shape:  (65, 41)
num indices:  41
decoded frames length:  65
words:  [56450, 28479, 80517, 112089, 27187, 79959, 112089, 21977]
Partial CTC result: IT DEPENDS ON THE DECISION OF THE COMPANY
shape:  (34, 41)
num indices:  41
decoded frames length:  34
words:  [121438, 49337, 56270, 79014]
Partial CTC result: WHETHER HE IS NOT
shape:  (22, 41)
num indices:  41
decoded frames length:  22
words:  [112074, 54319, 112201, 79153]
Partial CTC result: THAT'S IN THERE NOW
shape:  (74, 41)
num indices:  41
decoded frames length:  74
words:  [79959, 112089, 111496, 3653, 100

shape:  (50, 41)
num indices:  41
decoded frames length:  50
words:  [79153, 53919, 32143, 15865, 76335, 121901, 4345]
Partial CTC result: NOW I'M DRIVEN BY MY WIFE'S ANYWAY
shape:  (67, 41)
num indices:  41
decoded frames length:  67
words:  [112089, 45788, 40569, 40367, 85227, 81496]
Partial CTC result: THE GRAND FORKS FOR PHASING OUT
shape:  (53, 41)
num indices:  41
decoded frames length:  53
words:  [40367, 71356, 56499, 809, 3, 88360, 45219, 107860]
Partial CTC result: FOR ME IT'S ACTUALLY A PRETTY GOOD STOP
shape:  (26, 41)
num indices:  41
decoded frames length:  26
words:  [112074, 84492]
Partial CTC result: THAT'S PERFECT
shape:  (37, 41)
num indices:  41
decoded frames length:  37
words:  [121418, 49337, 44957, 113151, 99172]
Partial CTC result: WHEN HE GOES TO SCHOOL
shape:  (35, 41)
num indices:  41
decoded frames length:  35
words:  [121308, 3032, 82433, 111110]
Partial CTC result: WE'VE ALWAYS PAID TAXES
shape:  (51, 41)
num indices:  41
decoded frames length:  51
words:

decoded frames length:  44
words:  [19074, 81496, 3, 94206]
Partial CTC result: CHECK OUT A REVISION
shape:  (36, 41)
num indices:  41
decoded frames length:  36
words:  [80591, 40809, 52529, 80517, 112089, 120681]
Partial CTC result: ONLY FOUR HOURS ON THE WEEKEND
shape:  (24, 41)
num indices:  41
decoded frames length:  24
words:  [34778, 9556]
Partial CTC result: EMPLOYEE BENEFITS
shape:  (58, 41)
num indices:  41
decoded frames length:  58
words:  [44375, 112089, 84364, 79959, 29541, 79014, 123047]
Partial CTC result: GIVEN THE PEOPLE OF DID NOT WORK
shape:  (38, 41)
num indices:  41
decoded frames length:  38
words:  [29649, 85645, 79959, 123755]
Partial CTC result: DIFFERENT PIECES OF YEAR
shape:  (46, 41)
num indices:  41
decoded frames length:  46
words:  [112089, 116586, 54319, 112089, 63215, 123032]
Partial CTC result: THE UNIT IN THE LAST WORD
shape:  (57, 41)
num indices:  41
decoded frames length:  57
words:  [53551, 91569, 29553, 100258, 65147, 3, 78277]
Partial CTC resul

decoded frames length:  71
words:  [124047, 4912, 3, 45219, 80542, 113151, 110615, 256, 22945, 45288]
Partial CTC result: YOU ARE A GOOD ONE TO TALK ABOUT CONSUMER GOODS
shape:  (59, 41)
num indices:  41
decoded frames length:  59
words:  [54319, 113151, 66301, 65147, 80528, 3, 120678]
Partial CTC result: IN TO LOOK LIKE ONCE A WEEK
shape:  (36, 41)
num indices:  41
decoded frames length:  36
words:  [112074, 121713, 112089, 85478]
Partial CTC result: THAT'S WHY THE PHYSICAL
shape:  (77, 41)
num indices:  41
decoded frames length:  77
words:  [30913, 124047, 35501, 51016, 5444, 3, 106773, 80834, 5444, 3, 6961]
Partial CTC result: DO YOU ENVISION HIM AS A STARTER OR AS A BACKUP
shape:  (31, 41)
num indices:  41
decoded frames length:  31
words:  [8827, 112257, 15059, 56450]
Partial CTC result: BECAUSE THEY BUILT IT
shape:  (36, 41)
num indices:  41
decoded frames length:  36
words:  [15726, 124047, 809, 35192, 56450]
Partial CTC result: BUT YOU ACTUALLY ENJOY IT
shape:  (38, 41)
num ind

decoded frames length:  66
words:  [50546, 80542, 79959, 112089, 10500, 22112, 79959, 112063]
Partial CTC result: HE'S ONE OF THE BIG COMPONENT OF THAT
shape:  (40, 41)
num indices:  41
decoded frames length:  40
words:  [35192, 112089, 93822, 79959, 124089, 26792]
Partial CTC result: ENJOY THE REST OF YOUR DAY
shape:  (79, 41)
num indices:  41
decoded frames length:  79
words:  [54319, 3, 88223, 41602, 112904, 113151, 112904]
Partial CTC result: IN A PRESENTATION FROM TIME TO TIME
shape:  (35, 41)
num indices:  41
decoded frames length:  35
words:  [112367, 56270, 3, 88676]
Partial CTC result: THIS IS A PROBLEM
shape:  (63, 41)
num indices:  41
decoded frames length:  63
words:  [121085, 79014, 45005, 113151, 110547, 56450, 4334]
Partial CTC result: WE'RE NOT GOING TO TAKE IT ANYMORE
shape:  (29, 41)
num indices:  41
decoded frames length:  29
words:  [112367, 56270, 91569, 48518]
Partial CTC result: THIS IS REALLY HARD
shape:  (70, 41)
num indices:  41
decoded frames length:  70
word

decoded frames length:  67
words:  [75446, 4912, 1263, 113151, 6551, 112089, 4913]
Partial CTC result: MOTORISTS ARE ADVISED TO AVOID THE AREA
shape:  (75, 41)
num indices:  41
decoded frames length:  75
words:  [9189, 90795, 80517, 112089, 94745, 102619, 79959, 100610, 54319, 56450]
Partial CTC result: BEING RAISED ON THE RIGHT SIDE OF SEMEN IN IT
shape:  (72, 41)
num indices:  41
decoded frames length:  72
words:  [64582, 117238, 60650, 121362, 113151, 112326, 3653, 35192, 112089, 102324]
Partial CTC result: LET US KNOW WHAT TO THINK AND ENJOY THE SHOW
shape:  (44, 41)
num indices:  41
decoded frames length:  44
words:  [112089, 65856, 56270, 82433, 40367, 56450]
Partial CTC result: THE LOAN IS PAID FOR IT
shape:  (32, 41)
num indices:  41
decoded frames length:  32
words:  [66861, 76335, 53227, 3653, 53551]
Partial CTC result: LUCKILY MY HUSBAND AND I
shape:  (73, 41)
num indices:  41
decoded frames length:  73
words:  [120518, 49168, 3, 66504, 79959, 84364, 53551, 123047, 80517, 11

decoded frames length:  38
words:  [112089, 797, 86440, 112089, 83244]
Partial CTC result: THE ACTOR PLAYING THE PART
shape:  (51, 41)
num indices:  41
decoded frames length:  51
words:  [8630, 3, 45219, 26881, 79959, 108854]
Partial CTC result: BE A GOOD DEAL OF SUCCESS
shape:  (72, 41)
num indices:  41
decoded frames length:  72
words:  [55380, 79959, 71356, 49194, 113151, 112326, 256, 56450, 104517, 75673]
Partial CTC result: INSTEAD OF ME HAVING TO THINK ABOUT IT SO MUCH
shape:  (65, 41)
num indices:  41
decoded frames length:  65
words:  [123193, 112257, 8630, 10181, 79967, 116112, 21938]
Partial CTC result: WOULD THEY BE BETTER OFF UNDER COMMUNISM
shape:  (64, 41)
num indices:  41
decoded frames length:  64
words:  [112089, 74624, 40367, 56450, 56270, 79014, 5444, 91495]
Partial CTC result: THE MONEY FOR IT IS NOT AS READING
shape:  (23, 41)
num indices:  41
decoded frames length:  23
words:  [56499, 59852, 79959, 41957]
Partial CTC result: IT'S KIND OF FUNNY
shape:  (28, 41)
num

decoded frames length:  53
words:  [56450, 105186, 88360, 27142]
Partial CTC result: IT SOUNDS PRETTY DECENT
shape:  (28, 41)
num indices:  41
decoded frames length:  28
words:  [15726, 54319, 112089, 72889]
Partial CTC result: BUT IN THE MIDDLE
shape:  (61, 41)
num indices:  41
decoded frames length:  61
words:  [112089, 113950, 56270, 3, 91531, 86485, 113151, 65623, 113151]
Partial CTC result: THE TRACK IS A REAL PLEASURE TO LISTEN TO
shape:  (49, 41)
num indices:  41
decoded frames length:  49
words:  [54319, 3, 121668, 66504, 79959, 123776]
Partial CTC result: IN A WHOLE LOT OF YEARS
shape:  (52, 41)
num indices:  41
decoded frames length:  52
words:  [53551, 49168, 113151, 58981, 117016, 122580, 51016]
Partial CTC result: I HAVE TO KEEP UP WITH HIM
shape:  (34, 41)
num indices:  41
decoded frames length:  34
words:  [75057, 80834, 64552, 2470]
Partial CTC result: MORE OR LESS ALIKE
shape:  (72, 41)
num indices:  41
decoded frames length:  72
words:  [120518, 49168, 3, 104578, 1111

decoded frames length:  27
words:  [112257, 123193, 8630, 112063]
Partial CTC result: THEY WOULD BE THAT
shape:  (35, 41)
num indices:  41
decoded frames length:  35
words:  [53551, 98325, 51016, 54319, 3, 16983]
Partial CTC result: I SAW HIM IN A CAR
shape:  (74, 41)
num indices:  41
decoded frames length:  74
words:  [124047, 60650, 5865, 104899, 86825, 113474, 75673, 56270, 113474, 75673]
Partial CTC result: YOU KNOW AT SOME POINT TOO MUCH IS TOO MUCH
shape:  (51, 41)
num indices:  41
decoded frames length:  51
words:  [104899, 4912, 80542, 83244, 79959, 112089, 35638]
Partial CTC result: SOME ARE ONE PART OF THE EQUATION
shape:  (73, 41)
num indices:  41
decoded frames length:  73
words:  [121362, 120271, 116970, 120271, 112089, 18642, 79959, 112089, 83428]
Partial CTC result: WHAT WAS UNUSUAL WAS THE CHALLENGE OF THE PAST
shape:  (61, 41)
num indices:  41
decoded frames length:  61
words:  [121362, 30913, 124047, 35192, 75354, 256, 124089, 83244]
Partial CTC result: WHAT DO YOU EN

decoded frames length:  69
words:  [49337, 103000, 3653, 112089, 115564, 79959, 112140]
Partial CTC result: HE SIMILARLY AND THE TWO OF THEM
shape:  (25, 41)
num indices:  41
decoded frames length:  25
words:  [53551, 31415, 60650]
Partial CTC result: I DON'T KNOW
shape:  (38, 41)
num indices:  41
decoded frames length:  38
words:  [39465, 31181, 3524, 52522]
Partial CTC result: FIVE DOLLARS AN HOUR
shape:  (56, 41)
num indices:  41
decoded frames length:  56
words:  [120518, 60650, 121660, 67691, 112089, 94745, 27187]
Partial CTC result: WE KNOW WHO MADE THE RIGHT DECISION
shape:  (65, 41)
num indices:  41
decoded frames length:  65
words:  [53748, 124047, 109442, 76335, 94745, 113151, 8630, 25759]
Partial CTC result: IF YOU SUPPORT MY RIGHT TO BE CUT
shape:  (58, 41)
num indices:  41
decoded frames length:  58
words:  [124047, 49168, 113151, 30913, 3, 121668, 66504, 79959, 102071, 117016]
Partial CTC result: YOU HAVE TO DO A WHOLE LOT OF SHIT UP
shape:  (65, 41)
num indices:  41
deco

decoded frames length:  69
words:  [81414, 4912, 79014, 94980, 113151, 112089, 79726]
Partial CTC result: OTHERS ARE NOT RISING TO THE OCCASION
shape:  (58, 41)
num indices:  41
decoded frames length:  58
words:  [29541, 53551, 73931, 4330, 86825, 86836]
Partial CTC result: DID I MISS ANY POINT POINTS
shape:  (66, 41)
num indices:  41
decoded frames length:  66
words:  [56450, 58084, 91569, 21754, 31758, 113151, 84364, 80834, 84364]
Partial CTC result: IT JUST REALLY COMES DOWN TO PEOPLE OR PEOPLE
shape:  (61, 41)
num indices:  41
decoded frames length:  61
words:  [56450, 120271, 56428, 15865, 112089, 34934, 79959, 112089, 123755]
Partial CTC result: IT WAS ISSUED BY THE END OF THE YEAR
shape:  (79, 41)
num indices:  41
decoded frames length:  79
words:  [120518, 66301, 6870, 113151, 112089, 13117, 3653, 114624, 113151, 104137]
Partial CTC result: WE LOOK BACK TO THE BOY AND TRIED TO SMILE
shape:  (75, 41)
num indices:  41
decoded frames length:  75
words:  [35676, 113151, 3, 24156, 8

decoded frames length:  57
words:  [112089, 81412, 12430, 53551, 3054, 115047, 113151, 112326]
Partial CTC result: THE OTHER BOOK I AM TRYING TO THINK
shape:  (45, 41)
num indices:  41
decoded frames length:  45
words:  [112149, 80542, 78032, 120358]
Partial CTC result: THEN ONE NIGHT WATCH
shape:  (26, 41)
num indices:  41
decoded frames length:  26
words:  [124090, 45005, 113151, 49168, 113151]
Partial CTC result: YOU'RE GOING TO HAVE TO
shape:  (42, 41)
num indices:  41
decoded frames length:  42
words:  [58084, 40367, 112089, 10181]
Partial CTC result: JUST FOR THE BETTER
shape:  (62, 41)
num indices:  41
decoded frames length:  62
words:  [53551, 112326, 112367, 56270, 333, 94622]
Partial CTC result: I THINK THIS IS ABSOLUTELY RIDICULOUS
shape:  (30, 41)
num indices:  41
decoded frames length:  30
words:  [53551, 24032, 58084, 72126]
Partial CTC result: I COULD JUST MENTIONED
shape:  (67, 41)
num indices:  41
decoded frames length:  67
words:  [120518, 49168, 45479, 256, 114024, 1

decoded frames length:  66
words:  [56499, 3, 56499, 3, 65711, 85642, 79959, 62705, 81750, 112201]
Partial CTC result: IT'S A IT'S A LITTLE PIECE OF LAND OVER THERE
shape:  (53, 41)
num indices:  41
decoded frames length:  53
words:  [112201, 120271, 3, 82441, 54319, 50298, 49341]
Partial CTC result: THERE WAS A PAIN IN HER HEAD
shape:  (64, 41)
num indices:  41
decoded frames length:  64
words:  [53551, 31415, 60650, 53748, 53919, 68165, 4330, 100749, 80834, 79014]
Partial CTC result: I DON'T KNOW IF I'M MAKING ANY SENSE OR NOT
shape:  (53, 41)
num indices:  41
decoded frames length:  53
words:  [15726, 112089, 84364, 112063, 124090, 66309, 40367]
Partial CTC result: BUT THE PEOPLE THAT YOU'RE LOOKING FOR
shape:  (60, 41)
num indices:  41
decoded frames length:  60
words:  [56450, 58084, 28479, 80517, 121423, 124047, 44804]
Partial CTC result: IT JUST DEPENDS ON WHERE YOU GO
shape:  (78, 41)
num indices:  41
decoded frames length:  78
words:  [49337, 58084, 59852, 79959, 98378, 112322

In [22]:
nbest_outputs

[[("I'M ORIGINALLY FROM COLORADO", -101.41801834106445, -50.70900917053223)],
 [('I HAD BEEN ABLE TO MAKE A LOAN',
   -205.97645568847656,
   -102.98822784423828)],
 [('I HAD NO LUCK', -100.05517959594727, -50.02758979797363)],
 [('I THOUGHT THE TOPIC WAS BORING', -126.98027801513672, -63.49013900756836)],
 [('WHATEVER DRIVE UP THERE', -114.25894546508789, -57.129472732543945)],
 [("YOU DON'T GET INVOLVED", -89.41394805908203, -44.706974029541016)],
 [('HOPE YOU ENJOY THEM', -94.01966094970703, -47.009830474853516)],
 [("WE DON'T DO ENOUGH", -63.04325866699219, -31.521629333496094)],
 [('A VOICE FROM THE CROWD', -80.50192260742188, -40.25096130371094)],
 [('I HOPE YOU ENJOY MY BLOG', -91.48125457763672, -45.74062728881836)],
 [('I NEVER HAVE', -87.22953796386719, -43.614768981933594)],
 [('MAYBE EVEN TOO FAR', -76.24846839904785, -38.124234199523926)],
 [("IT'S AN ATTENTION FOR QUITE SOME TIME",
   -182.03670501708984,
   -91.01835250854492)],
 [('SKIRT AND BLOUSE OR DRESS', -150.10784

In [23]:
# rnn_outputs["trueSeqs"][-1]

In [24]:
# print(dir(SymbolTable))

In [25]:
# help(get_linear_symbol_sequence)

In [26]:
# help(SymbolTable.find_symbol)

In [27]:
# help(SymbolTable.find_index)

In [28]:
# print(dir(ngramDecoder.decoder.decoder))

In [29]:
# help(ngramDecoder.decoder.decoder.get_lattice)

In [30]:
# ngramDecoder.decoder.decoder.get_lattice(True)

In [31]:
# from kaldi.fstext import LatticeVectorFst

In [32]:
# ngramDecoder.decoder.decoder.advance_decoding(nma)

In [33]:
# print(dir(LatticeVectorFst))

In [34]:
# print(dir(DecodableTensorScaled))

In [35]:
# print(dir(LatticeFasterOnlineDecoder))