In [82]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

from eval import NMSLTestDataset, Classifier

import pandas as pd
import os

In [83]:
# load LM
df_LM = pd.read_csv("./bigram.csv")
LM = {row["word"].upper(): row["prob"] for i, row in df_LM.iterrows()}

# load dict
df_dict = pd.read_csv("./unigram_freq.csv")
dictionary = df_dict["word"][:500]
dictionary = set(dictionary)
dictionary.add("dog")
dictionary.add("apple")
dictionary.add("shit")
dictionary.add("fuck")

matrix = []
dict_list = []
for i in dictionary:
    vector = [0 for _ in range(26)]
    for c in i:
        vector[ord(c) - ord("a")] += 1
    matrix.append(vector)
    dict_list.append(i)
matrix_tensor = torch.tensor(matrix)

In [84]:
test_set = NMSLTestDataset("../dataset/test/", "dog.txt")
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

model = Classifier()
model.load_state_dict(torch.load("./models/model_unnorm.pth"))
model.eval()

Classifier(
  (cnn): Sequential(
    (0): Conv1d(8, 10, kernel_size=(3,), stride=(1,))
    (1): Flatten(start_dim=1, end_dim=-1)
  )
  (fc): Sequential(
    (0): Linear(in_features=30, out_features=27, bias=True)
  )
)

In [85]:
score = torch.tensor([[0 for i in range(27)] for j in range(5)])

In [86]:
torch.zeros((27))

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])

In [87]:
def compare(last_topN, topN, N):
    last_vector = torch.zeros((27))
    for i in last_topN:
        last_vector[i] = 1
    
    this_vector = torch.zeros((27))
    for i in topN:
        this_vector[i] = 1
    
    # print(last_vector, this_vector)
    
    return torch.sum(torch.mul(last_vector, this_vector))

In [88]:
def sim(matrix, word):
    word_vector = [0 for i in range(26)]
    for i in word:
        word_vector[ord(i) - ord("A")] += 1
    word_vector = torch.tensor(word_vector)
    
    score = []
    for v in matrix:
        score.append(torch.sum(torch.mul(v, word_vector)) / max(torch.sum(v), len(word)))
    
    score = torch.tensor(score)
    return torch.argsort(score, descending=True)[:20]

In [89]:
import numpy as np

def DTW(recog, ref):
    table = np.zeros((len(recog), len(ref)))
    
    table[0, 0] = int(recog[0] != ref[0])
    # init the first row
    for i in range(1, table.shape[0]):
        table[i, 0] = int(recog[i] != ref[0]) + table[i-1, 0]
        
    # init the first col
    for i in range(1, table.shape[1]):
        table[0, i] = int(recog[0] != ref[i]) + table[0, i-1]
        
    for i in range(1, table.shape[0]):
        for j in range(1, table.shape[1]):
            table[i, j] = min(table[i-1, j] + 1,
                              table[i, j-1] + 1,
                              table[i-1, j-1] + int(recog[i] != ref[j]))
    
    # print(table)
    return table[-1, -1]

In [90]:
DTW("aplee", "apple")

2.0

In [91]:
cnt = 0
N = 3

count = 0

last_topN = []
last_topN_prob = []
table = {}

avg_score = 0

for batch in test_loader:
    inputs, _ = batch
    
    output_res = model.fc[0](model.cnn[0](inputs).view(-1))
    # output_res = model(inputs)
    score[cnt % 5] = output_res
    
    cnt += 1
    if cnt < 5:
        continue
    
    # avg_score = (torch.sum(score, dim = 0) / 5) + avg_score / 2
    avg_score = F.softmax(output_res)
    
    topN = torch.argsort(avg_score, descending = True)[:N]
    topN_prob = torch.sort(avg_score, descending = True)[0][:N]
    
    match = compare(last_topN, topN, N)
    # print(topN, match)
    
    # print(match)
    
    if match >= N:
        count += 1
    elif count > 5:
        print(count)
        line = ""
        for i in last_topN:
            line += chr(ord("A") + i) + "\t"
            
        if table == {}:
            for i, c in enumerate(last_topN):
                if c == 26:
                    char = " "
                else:
                    char = chr(ord("A") + c)
                table[char] = LM[f" {char}"] * last_topN_prob[i]
        else:
            tmp_table = {}
            for i, c in enumerate(last_topN):
                if c == 26:
                    # char = " "
                    continue
                else:
                    char = chr(ord("A") + c)
                this_seq = ""
                this_prob = -1
                
                for s, p in table.items():
                        # print(this_seq, s, p)
                    if p * LM[s[-1] + char] * last_topN_prob[i] > this_prob:
                        this_seq = s + char
                        this_prob = p * LM[s[-1] + char] * last_topN_prob[i]

                    # print(char, s, this_seq, this_prob)
                tmp_table[this_seq] = this_prob
            
            table = tmp_table
        
        print(line)
        count = 0
    else:
        count = 0
        
        

    last_topN = topN
    last_topN_prob = topN_prob
    
    # print(match)
    
    # line = ""
    # for i in topN:
    #     line += chr(ord("A") + i) + "\t"
    
    # print(line)
    
    

18
[	J	M	
12
D	Z	H	
10
T	Z	D	
9
O	S	E	
16
F	G	J	


In [92]:
output_res

tensor([-26971.9102,  -2471.3215,   7241.4780, -21668.5117, -39232.0859,
         11284.2695,  32233.6465,  -4176.1074,  11352.4482,  57395.9648,
         26173.9180,   9954.7021,  55054.1172,   5895.0137, -10717.1680,
          7016.1040, -39109.5820, -25625.5762, -26987.9492, -37922.9531,
        -30904.9141, -15302.6719,   1508.2080,   7213.1768,   9834.4863,
         13818.2129,  51524.5117], grad_fn=<AddBackward0>)

In [93]:
table

{'MHTOF': tensor(59655624., grad_fn=<MulBackward0>),
 'MDDEG': tensor(18099652., grad_fn=<MulBackward0>),
 'MHTOJ': tensor(1492542.3750, grad_fn=<MulBackward0>)}

In [94]:
word = list(table.keys())[0]
# word = "APLE"
tops = sim(matrix_tensor, word)

scores = {}
for i in tops:
    scores[dict_list[i]] = (len(dict_list[i]) - 
                            DTW(word.lower(), dict_list[i])) / len(dict_list[i])

max_pair = ("", -1)

for key, val in scores.items():
    if val > max_pair[1]:
        max_pair = (key, val)

print(max_pair)
print(scores)

('photos', 0.5)
{'photo': 0.4, 'photos': 0.5, 'them': 0.0, 'total': 0.2, 'most': 0.0, 'that': 0.0, 'home': 0.0, 'hot': 0.0, 'both': 0.0, 'forum': 0.0, 'off': -0.3333333333333333, 'from': 0.0, 'too': 0.0, 'hotel': 0.2, 'form': -0.25, 'room': 0.0, 'other': 0.0, 'yahoo': 0.2, 'food': 0.0, 'south': 0.0}


In [95]:
DTW("results", "APRLLS")

7.0

In [96]:
output_res = model.fc[0](model.cnn[0](inputs).view(-1))

In [97]:
score[0] + output_res

tensor([-53942.9102,  -4942.3213,  14482.4785, -43336.5117, -78464.0859,
         22568.2695,  64466.6484,  -8352.1074,  22704.4492, 114790.9688,
         52346.9180,  19908.7031, 110108.1172,  11790.0137, -21434.1680,
         14032.1035, -78218.5781, -51250.5781, -53974.9492, -75844.9531,
        -61808.9141, -30604.6719,   3016.2080,  14426.1768,  19668.4863,
         27636.2129, 103048.5156], grad_fn=<AddBackward0>)

In [98]:
torch.argsort(avg_score, descending=True)

tensor([ 9, 12, 26,  6, 10, 25,  8,  5, 11, 24,  2, 23, 15, 13, 22,  1,  7, 14,
        21,  3, 17,  0, 18, 20, 19, 16,  4])

In [99]:
chr(ord("A") + 11)

'L'

In [100]:
output_res

tensor([-26971.9102,  -2471.3215,   7241.4780, -21668.5117, -39232.0859,
         11284.2695,  32233.6465,  -4176.1074,  11352.4482,  57395.9648,
         26173.9180,   9954.7021,  55054.1172,   5895.0137, -10717.1680,
          7016.1040, -39109.5820, -25625.5762, -26987.9492, -37922.9531,
        -30904.9141, -15302.6719,   1508.2080,   7213.1768,   9834.4863,
         13818.2129,  51524.5117], grad_fn=<AddBackward0>)

In [101]:
model.cnn[1].weight

AttributeError: 'Flatten' object has no attribute 'weight'

In [None]:
256

00000100 00000010 00010101