In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
import collections
from nltk.stem import PorterStemmer 

In [2]:
word_to_index = {}
embedding_list = []
with open("glove.6B.100d.txt", 'r', encoding="utf-8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = torch.FloatTensor(np.asarray(values[1:], "float32"))
        word_to_index[word] = vector

In [3]:

file_utterance = open("hw2_utterance_dev.txt", 'r')
utterances_dev = []
for line in file_utterance.readlines():
    utterances_dev.append(line[:-1])
file_tag = open("hw2_tags_dev.txt", "r")
tags_dev = []
for line in file_tag.readlines():
    tags_dev.append(line[:-1])

data = pd.read_csv('hw2_train.csv')
utterances_train = data['utterances']
word_to_index['pad'] = torch.FloatTensor([0]*100)
#l = [len(u.split(" ")) for u in utterances_train]
#10
tags_train = data['IOB Slot tags']
tag_counter = collections.defaultdict(int)
for tag in tags_train:
    for t in tag.split():
        tag_counter[t] += 1
        
tag_to_index = {'pad' : 0}
for k,v in sorted(tag_counter.items(), reverse=True, key=lambda kv:(kv[1], kv[0])):
    tag_to_index[k] = len(tag_to_index)

index_to_tag = {}
for k,v in tag_to_index.items():
    index_to_tag[v] = k

In [4]:
ps = PorterStemmer()
def prepare(sen):
    for i in range(len(sen)):
        sen[i] = ps.stem(sen[i])
    if sen[0] in word_to_index:
        res = torch.FloatTensor(word_to_index[sen[0]])
    else:
        res = torch.FloatTensor([-1]*100)
    for word in sen[1:]:
        if word not in word_to_index:
            res = torch.cat((res, torch.FloatTensor([-1]*100)), dim=0)
            continue
        res = torch.cat((res, word_to_index[word]), dim=0)
    return res.cuda()

def prepare_tag(tags):
    res = []
    for tag in tags:
        res.append(tag_to_index[tag])
    res = torch.LongTensor(res)
    return res.cuda()

In [20]:
class LSTMTagger(nn.Module):


    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim*2, tagset_size)

    def forward(self, sentence):
        embeds = prepare(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [21]:

model = LSTMTagger(100, 200, len(tag_to_index)).cuda()

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
for epoch in range(30):
    total_loss = 0
    for i in range(len(data)):
        model.zero_grad()
        
        utt_original = utterances_train[i].split(' ')
        tag_original = tags_train[i].split(' ')
        if len(utt_original) != len(tag_original):
            for i in range(len(utt_original)):
                if utt_original[i][0] == "'":
                    temp = utt_original[i-1] + utt_original[i]
                    utt_original = utt_original[:i-1] + [temp] + utt_original[i+1:]
                    break
        if len(utt_original) != len(tag_original):
            print(utt_original)
            print(tag_original)
            continue
        utt = []
        tag = []
        
        while True:
            if len(utt_original) == 10:
                utt.append(utt_original)
                tag.append(tag_original)
                break
            elif len(utt_original) < 10:
                while len(utt_original) != 10:
                    utt_original.append('pad')
                    tag_original.append('pad')
                utt.append(utt_original)
                tag.append(tag_original)
                break
            else:
                utt.append(utt_original[:10])
                utt_original = utt_original[10:]
                tag.append(tag_original[:10])
                tag_original = tag_original[10:]
        for j in range(len(utt)):
            predicted_tag = model(utt[j])
            target = prepare_tag(tag[j])
            loss = loss_function(predicted_tag, target)
            loss.backward()
            optimizer.step()
            total_loss += loss
    
    print("epoch=", epoch, "loss=", total_loss/len(data))
        

RuntimeError: bool value of Tensor with more than one value is ambiguous

In [15]:
def detect(output):
    res = []
    for line in output:
        line = list(line)
        res.append(index_to_tag[line.index(max(line))])
    
    return res
print(utt[j])
print(detect(model(utt[j])))

['can', 'i', 'see', 'what', 'the', 'lion', 'king', "'s", 'revenu', 'wa']
['O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'O', 'O', 'O']


In [18]:
file_test = open('hw2_utterance_test.txt', 'r')
stopword_set  = set(['what', 'can', 'i', 'who', 'I'])
file_token = open('hw2_tokens_test.txt', 'r')
test_token = []
for line in file_token.readlines():
    test_token.append(line[:-1])

test = []
for line in file_test.readlines():
    test.append(line[:-1])
output = []
with torch.no_grad():
    for i in range(len(test)):
        utt_original = test[i].split(' ')
        tag_original = test_token[i].split(' ')
        if len(utt_original) != len(tag_original):
            for i in range(len(utt_original)):
                if utt_original[i][0] == "'":
                    temp = utt_original[i-1] + utt_original[i]
                    utt_original = utt_original[:i-1] + [temp] + utt_original[i+1:]
                    break
        utt = []
        tag = []
        while True:
            if len(utt_original) == 10:
                utt.append(utt_original)
                tag.append(tag_original)
                break
            elif len(utt_original) < 10:
                while len(utt_original) != 10:
                    utt_original.append('pad')
                    tag_original.append('pad')
                utt.append(utt_original)
                tag.append(tag_original)
                break
            else:
                utt.append(utt_original[:10])
                utt_original = utt_original[10:]
                tag.append(tag_original[:10])
                tag_original = tag_original[10:]
                
        for j in range(len(utt)):
            padding_number = 0
            for index in range(len(utt[j])-1, -1, -1):
                if utt[j][index] == 'pad':
                    padding_number += 1
            predicted_tag = model(utt[j])    
            temp = detect(predicted_tag)[:(10 - padding_number)]
            output.append(temp)
            print(utt[j])
            print(temp)
#print(output)

['find', 'out', 'what', 'languag', 'the', 'father', 'of', 'my', 'children', 'is']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'O']
['in', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O']
['search', 'for', 'zombi', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_subject', 'O']
['summari', 'of', 'star', 'war', 'four', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie', 'I_movie']
['spain', 'ha', 'how', 'mani', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O']
['who', 'star', 'in', 'hous', 'at', 'the', 'end', 'of', 'the', 'street']
['O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie']
['i', 'want', 'to', 'see', 'a', 'movi', 'in', 'french', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_language']
['steven', 'spielberg', 'summari', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_director', 'I_person', 'O']
['i', 'need', 'to', 'see', 'all', 'the', 'detail', 

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O']
['i', 'need', 'detail', 'about', 'the', 'movi', 'star', 'wars:episod', 'four', '-a']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'O']
['new', 'hope', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['I_movie', 'I_movie']
['i', 'would', 'like', 'to', 'know', 'the', 'releas', 'date', 'of', 'the']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['movi', 'avatar', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_movie']
['find', 'director', 'of', 'find', 'nemo', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie', 'I_movie']
['what', 'movi', 'did', 'joe', 'coen', 'do', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'I_director', 'O']
['who', 'produc', 'avatar', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie']
['i', 'want', 'movi', 'in', 'french', 'pad', 'pad', 'pad', 'pad', 'pad']
['O

['street', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['I_movie']
['when', 'wa', 'men', 'in', 'black', 'releas', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie', 'I_movie', 'O']
['what', 'did', 'the', 'movi', 'the', 'hous', 'at', 'the', 'end', 'of']
['O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie']
['the', 'steet', 'gross', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['I_movie', 'I_movie', 'O']
["i'd", 'love', 'to', 'watch', 'an', 'albert', 'ruddi', 'movi', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_producer', 'I_person', 'O']
['can', 'you', 'find', 'out', 'what', 'rate', 'the', 'movi', 'greas', 'is']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'O']
['show', 'all', 'movi', 'that', 'penni', 'marshal', 'direct', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_director', 'O', 'O']
["who'", 'the', 'director', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O']
['what', 'wa', 'the', 'last', 'movi', 'made',

['give', 'me', 'the', 'genr', 'of', 'the', 'movi', 'avatar', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_movie']
['movi', 'in', 'french', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_language']
['when', 'the', 'movi', 'the', 'godfath', 'wa', 'releas', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'I_movie', 'O', 'O']
['movi', 'inform', 'for', 'life', 'is', 'beauti', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie']
['languag', 'of', 'spanglish', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie']
['movi', 'and', 'gross', 'payment', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_director', 'O', 'O', 'O']
['about', 'life', 'is', 'beauti', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_movie', 'I_movie', 'I_movie']
['show', 'me', 'movi', 'credit', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O']
['show', 'me', 'a', 'list', 'of', 'all', 'movi', 'produc', 'by', 'alfr']
['O', 'O', 'O', 'O', 'O', 'O', 'O', '

['show', 'me', 'the', 'film', 'that', 'gross', 'seven', 'point', 'two', 'million']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I_movie', 'O']
['at', 'the', 'box', 'offic', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['I_movie', 'I_movie', 'O', 'O']
['show', 'me', 'movi', 'by', 'paramount', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_producer']
['how', 'much', 'revenu', 'did', 'top', 'gun', 'make', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'O']
['can', 'you', 'find', 'movi', 'about', 'preschool', 'boy', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_mpaa_rating', 'I_movie']
['new', 'releas', 'video', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_producer', 'O']
['which', 'movi', 'did', 'bruce', 'willi', 'star', 'in', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_cast', 'I_cast', 'O', 'O']
['review', 'for', 'find', 'nemo', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie']
['find', 'movi', 'produc', 'by', 'mgm', 'pad', 'pa

['O', 'O', 'O', 'O', 'B_director', 'I_director', 'O']
['who', 'play', 'hawkey', 'in', 'mash', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_char', 'O', 'B_movie']
['i', 'would', 'like', 'to', 'know', 'in', 'detail', 'about', 'the', 'movi']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['hous', 'at', 'the', 'end', 'of', 'the', 'street', 'pad', 'pad', 'pad']
['B_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie']
['i', 'want', 'tom', 'cruis', 'movi', 'pleas', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_person', 'I_person', 'O', 'O']
['what', 'wa', 'the', 'revenu', 'of', 'pretti', 'woman', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['spanish', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_language', 'O']
['search', 'by', 'actor', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O']
['show', 'me', 'movi', 'rate', 'r', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_mpaa_rating']
['what', 'is'

['O', 'O', 'O', 'B_movie', 'I_movie']
['find', 'me', 'movi', 'from', 'the', 'uk', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_country']
['what', 'wa', 'the', 'budget', 'for', 'the', 'avatar', 'movi', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'O']
['what', 'kind', 'of', 'movi', 'is', 'parenthood', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie']
['list', 'movi', 'with', 'bruce', 'willi', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_cast', 'I_cast']
['show', 'me', 'movi', 'with', 'g', 'rate', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_mpaa_rating', 'O']
['dirti', 'danc', 'actor', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O']
['will', 'ferrel', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_person', 'I_person', 'O']
['korean', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_country', 'O']
['how', 'much', 'money', 'did', 'mr', 'and', 'mr', 'smith', 'make', 'pad']
['O', 'O', 'O'

['display', 'albert', 'brook', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_person', 'I_person', 'O']
['is', 'nine', 'fourteen', 'the', 'premier', 'date', 'for', 'resid', 'evil', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['who', 'are', 'cast', 'and', 'crew', 'of', 'dirti', 'danc', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['when', 'did', 'the', 'hous', 'at', 'the', 'end', 'of', 'the', 'street']
['O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie', 'I_movie']
['hit', 'theater', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O']
['show', 'me', 'movi', 'with', 'tom', 'hank', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_cast', 'I_cast']
['the', 'godfath', 'wa', 'produc', 'by', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O']
['show', 'me', 'what', 'genr', 'doe', 'movi', 'lawless', 'belong', 'to', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_producer', 'B_movie', 'O', 'O']
['w

['what', 'movi', 'have', 'been', 'direct', 'by', 'clint', 'eastwood', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_director', 'I_director']
['get', 'horror', 'film', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_genre', 'O']
['what', 'is', 'a', 'british', 'horror', 'movi', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_genre', 'O']
['what', 'languag', 'is', 'the', 'movi', 'hero', 'in', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['video', 'relat', 'to', 'steven', 'spielberg', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_director', 'I_director']
['review', 'for', 'find', 'nemo', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie']
['best', 'director', 'of', 'all', 'time', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O']
['what', 'are', 'some', 'zombi', 'apocalyps', 'movi', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_subject', 'O', 'O']
['find', 'a', 'g', 'rate', 'movi', 'pad', 'pad', 'pad', 'pad',

['show', 'me', 'movi', 'produc', 'by', 'bruce', 'lee', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_producer', 'I_producer']
['movi', 'by', 'stephen', 'spielberg', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_director', 'I_director']
['show', 'me', 'nc', 'minu', 'seventeen', 'rate', 'movi', 'pad', 'pad', 'pad']
['O', 'O', 'B_mpaa_rating', 'I_mpaa_rating', 'I_mpaa_rating', 'O', 'O']
['i', 'want', 'to', 'see', 'imag', 'of', 'will', 'ferrel', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_person', 'I_person']
['where', 'wa', 'the', 'milk', 'of', 'sorrow', 'made', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'I_movie', 'I_movie', 'I_movie', 'O']
['who', 'is', 'in', 'vulguria', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O']
['how', 'much', 'did', 'it', 'cost', 'to', 'creat', 'the', 'boardwalk', 'empir']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie', 'O']
['hard', 'day', "'s", 'night', 'richard', 'lester', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 

['who', 'direct', 'field', 'of', 'dream', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie', 'I_movie']
['titan', 'director', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_movie', 'O']
['show', 'me', 'onli', 'movi', 'with', 'a', 'rate', 'of', 'r', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_mpaa_rating']
['what', 'are', 'the', 'top', 'five', 'french', 'movi', 'of', 'all', 'time']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I_movie']
['what', 'did', 'brad', 'pitt', 'porduc', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_person', 'I_person', 'O']
['what', 'movi', 'wa', 'angelina', 'a', 'main', 'actress', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie', 'O', 'O', 'O']
['what', 'us', 'releas', 'movi', 'have', 'been', 'film', 'in', 'the', 'czech']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_country']
['republ', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O']
['search', 'for', 'inform', 'about', 'the', 'cast', 'and', 'crew'

In [19]:
s = 0
i = 0
with open('prediction.txt', 'w') as f:
    for line in output:
        f.write(' '.join(line))
        f.write(' ')
        s += len(line)
        print(s)
        if s == len(test_token[i].split(" ")):
            f.write('\n')
            s = 0
            i += 1

10
11
4
5
5
10
8
3
10
12
7
6
4
3
4
7
6
6
10
5
5
3
5
6
4
10
11
5
10
18
5
8
6
8
4
8
8
6
7
7
6
6
2
10
15
5
8
7
9
5
3
7
5
9
6
6
9
10
12
9
2
3
9
3
5
8
10
16
10
11
10
12
10
12
5
6
3
5
10
17
5
6
5
2
6
5
6
3
6
7
8
10
3
3
4
3
3
7
4
7
8
7
6
10
19
9
7
10
11
9
5
4
3
5
8
8
10
8
6
5
7
10
9
6
2
1
3
4
10
6
9
4
7
10
16
4
7
10
11
6
10
13
8
10
7
3
9
10
16
7
7
9
2
3
4
6
10
13
5
6
7
3
7
9
6
6
7
10
3
9
9
4
9
7
8
6
5
7
7
7
5
9
5
5
6
6
4
5
5
6
8
10
11
7
9
4
7
6
4
7
9
10
4
7
10
8
3
7
6
3
4
4
4
10
11
8
10
11
7
7
4
5
4
7
9
5
5
9
6
6
9
9
6
3
6
5
6
3
8
8
5
5
6
5
8
7
10
2
7
6
7
7
7
10
13
3
8
8
6
2
6
5
4
2
5
7
9
10
12
7
4
8
3
6
10
14
5
7
7
3
7
4
5
9
7
3
6
9
9
9
7
5
8
8
4
5
10
13
5
10
13
6
8
5
3
5
6
6
10
4
4
4
9
3
8
4
7
10
15
8
4
7
9
6
4
4
9
7
8
10
11
4
3
8
8
9
6
6
7
6
7
5
10
17
6
7
2
3
5
6
10
4
8
5
3
10
17
4
7
8
4
10
13
7
3
10
11
7
5
7
10
11
5
10
4
6
8
7
8
3
5
5
5
4
6
8
4
5
7
4
10
14
7
9
7
5
6
5
5
5
9
7
9
5
10
12
3
6
4
5
6
8
6
5
6
3
3
2
9
2
1
6
10
3
2
4
10
13
3
3
6
8
7
3
4
7
2
4
5
9
7
4
10
11
2
6
10
7
7
4
8
7
3
9
9


In [16]:
output = []
with torch.no_grad():
    for i in range(len(utterances_dev)):
        utt_original = utterances_dev[i].split(' ')
        tag_original = tags_dev[i].split(' ')
        if len(utt_original) != len(tag_original):
            for i in range(len(utt_original)):
                if utt_original[i][0] == "'":
                    temp = utt_original[i-1] + utt_original[i]
                    utt_original = utt_original[:i-1] + [temp] + utt_original[i+1:]
                    break
        utt = []
        tag = []
        while True:
            if len(utt_original) == 10:
                utt.append(utt_original)
                tag.append(tag_original)
                break
            elif len(utt_original) < 10:
                while len(utt_original) != 10:
                    utt_original.append('pad')
                    tag_original.append('pad')
                utt.append(utt_original)
                tag.append(tag_original)
                break
            else:
                utt.append(utt_original[:10])
                utt_original = utt_original[10:]
                tag.append(tag_original[:10])
                tag_original = tag_original[10:]
                
        for j in range(len(utt)):
            padding_number = 0
            for index in range(len(utt[j])-1, -1, -1):
                if utt[j][index] == 'pad':
                    padding_number += 1
            predicted_tag = model(utt[j])    
            temp = detect(predicted_tag)[:(10 - padding_number)]
            output.append(temp)
            print(utt[j])
            print(temp)

['show', 'me', 'one', 'by', 'david', 'fincher', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_director', 'I_person']
['who', 'is', 'director', 'of', 'the', 'word', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['what', 'rate', 'did', 'the', 'campaign', 'movi', 'get', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie', 'I_movie', 'O', 'O']
['how', 'much', 'did', 'looper', 'gross', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie', 'O']
['what', 'is', 'the', 'budget', 'for', 'epic', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie']
['open', 'a', 'page', 'about', 'the', 'movi', 'life', 'is', 'beauti', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie']
['search', 'for', 'movi', 'about', 'danc', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_subject']
['how', 'much', 'wa', 'made', 'from', 'platoon', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie']
['what', 'movi', 'did', 'stephen', 'spielber

['I_producer']
['who', 'is', 'the', 'director', 'of', 'sleepless', 'in', 'seattl', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie']
['show', 'me', 'some', 'other', 'movi', 'direct', 'by', 'andrew', 'stanton', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_director', 'I_director']
['what', 'film', 'are', 'made', 'by', 'paramount', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_producer']
['what', 'movi', 'were', 'made', 'in', 'ireland', 'thi', 'year', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_country', 'O', 'O']
['i', 'need', 'to', 'find', 'out', 'about', 'name', 'of', 'some', 'japanes']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O']
['give', 'me', 'name', 'of', 'some', 'action', 'movi', 'with', 'a', 'pg']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_mpaa_rating']
['thirteen', 'rate', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['I_mpaa_rating', 'O']
['i', 

['jame', 'cameron', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_director', 'I_director']
['pleas', 'tell', 'me', 'how', 'much', 'wa', 'the', 'budget', 'for', 'avatar']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_movie']
['i', 'want', 'to', 'find', 'movi', 'star', 'the', 'singer', 'mario', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B_cast', 'I_cast']
['have', 'ani', 'us', 'releas', 'movi', 'been', 'film', 'on', 'locat', 'in']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['the', 'czech', 'republ', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_country', 'O']
['show', 'movi', 'made', 'by', 'product', 'lionsgat', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_producer']
['what', 'day', 'will', 'django', 'unchain', 'come', 'out', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'I_person', 'O', 'O', 'O']
['list', 'movi', 'set', 'in', 'ireland', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_location']
['i', 'am', 'tri', 'to', 'find', 'a', 'li

['who', 'produc', 'avatar', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie']
['ethel', 'actress', 'from', 'i', 'love', 'luci', 'pad', 'pad', 'pad', 'pad']
['I_person', 'O', 'O', 'O', 'I_movie', 'I_movie']
['pleas', 'provid', 'the', 'cost', 'for', 'produc', 'the', 'deer', 'hunter', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie', 'I_movie']
['director', 'inform', 'for', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'B_movie']
['i', 'would', 'like', 'to', 'know', 'the', 'rate', 'of', 'the', 'movi']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['avatar', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['B_movie']
['when', 'did', 'black', 'swan', 'come', 'out', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', 'I_movie', 'O', 'O']
['list', 'japanes', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_producer', 'O']
['i', 'want', 'to', 'watch', 'a', 'movi', 'in', 'chines', 'pad', 'pad']
['O', 'O', 'O',

['what', 'are', 'some', 'time', 'travel', 'movi', 'i', 'can', 'watch', 'pad']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['what', 'is', 'the', 'name', 'of', 'a', 'turkish', 'director', 'that', 'produc']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['a', 'drama', 'film', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'B_genre', 'O']
['show', 'me', 'western', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_genre']
['what', 'languag', 'is', 'thi', 'movi', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O']
['what', 'wa', 'the', 'budget', 'for', 'the', 'aveng', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'O', 'B_movie', 'I_movie']
['avatar', "'s", 'budget', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'O']
['bring', 'up', 'all', 'movi', 'brad', 'pitt', 'direct', 'pad', 'pad', 'pad']
['O', 'O', 'O', 'O', 'B_director', 'I_person', 'O']
['who', 'produc', 'hotel', 'rwanda', 'cover', 'art', 'pad', 'pad', 'pad', 'pad']
['O', 'O', 'B_movie', '

In [17]:
s = 0
i = 0
with open('prediction.txt', 'w') as f:
    for line in output:
        f.write(' '.join(line))
        f.write(' ')
        s += len(line)
        print(s)
        if s == len(tags_dev[i].split(" ")):
            f.write('\n')
            s = 0
            i += 1

6
6
7
5
6
9
5
6
6
2
4
8
10
11
6
7
7
9
6
5
10
12
6
4
4
9
4
8
10
9
5
4
4
7
7
8
7
3
9
5
8
7
5
7
6
3
5
10
20
22
6
3
3
3
4
4
8
5
7
6
6
5
7
3
9
6
4
10
11
8
9
6
8
10
11
10
12
8
10
5
9
6
6
8
7
9
5
4
4
2
3
3
10
12
8
7
7
8
3
5
7
6
5
4
4
8
6
6
4
4
7
7
4
4
4
6
10
13
6
8
6
8
9
7
9
5
7
10
14
10
3
6
5
7
5
8
10
12
10
9
10
13
6
7
5
10
12
6
9
5
10
14
5
4
10
12
6
7
3
7
6
4
4
5
6
10
8
6
5
5
5
8
3
6
3
6
6
4
4
5
8
6
7
7
10
11
4
3
9
8
6
5
7
6
5
4
8
5
3
9
2
9
10
16
8
7
3
6
9
4
10
11
6
3
8
9
9
5
4
8
3
3
4
3
3
6
10
11
7
3
9
3
9
5
8
4
7
7
5
6
2
8
7
7
7
7
6
5
8
10
11
5
7
3
7
4
9
10
5
6
10
15
6
10
11
6
7
4
4
7
3
5
7
4
7
2
9
10
13
3
5
7
3
7
6
9
6
6
7
5
8
6
7
7
4
5
10
9
2
10
6
5
6
6
4
7
7
3
10
11
6
6
6
6
10
12
3
9
4
