In [1]:
import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import voc

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Open the text files with lines and conversations

In [3]:
lines_filepath = os.path.join("cornell movie-dialogs corpus", "movie_lines.txt")
conv_filepath = os.path.join("cornell movie-dialogs corpus", "movie_conversations.txt")

In [4]:
corpus_name = "cornell movie-dialogs corpus"
corpus = os.path.join("data", corpus_name)

def printLines(file, n=10):
    with open(file, 'rb') as datafile:
        lines = datafile.readlines()
    for line in lines[:n]:
        print(line)

In [52]:
save_dir = os.path.join("data", "save")

In [5]:
with open(lines_filepath,'r') as file:
    lines = file.readlines()

for line in lines[:8]:
    print(line.strip())

L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!
L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!
L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.
L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?
L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.
L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow
L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.
L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No


In [6]:
line_fields = ['lineID', 'characterID', 'movieID', 'character', 'text']

lines = {}

with open(lines_filepath, 'r', encoding = 'iso-8859-1') as f:
    for line in f:
        values = line.split(' +++$+++ ')
        lineObj = {}
        for i, field in enumerate(line_fields):
            lineObj[field] = values[i]
        lines[lineObj['lineID']] = lineObj

In [7]:
lines['L984']

{'lineID': 'L984',
 'characterID': 'u2',
 'movieID': 'm0',
 'character': 'CAMERON',
 'text': 'She okay?\n'}

# Convert the conversation to a dictionary with fields

In [8]:
conv_fields = ['character1ID', 'character2ID', 'movieID', 'utteranceIDs']
conversations = []

with open(conv_filepath, 'r', encoding = 'iso-8859-1') as f:
    for line in f:
        values = line.split(' +++$+++ ')
        convObj = {}
        for i, field in enumerate(conv_fields):
            convObj[field] = values[i]
        lineIds = eval(convObj['utteranceIDs'])
        convObj['lines'] = []
        for lineId in lineIds:
            convObj['lines'].append(lines[lineId])
        conversations.append(convObj)

In [9]:
print(conversations[0]['lines'][0]['text'],'\n',conversations[0]['lines'][1]['text'])

Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.
 
 Well, I thought we'd start with pronunciation, if that's okay with you.



In [10]:
qa_pairs = []
for conversation in conversations:
    for i in range(len(conversation['lines'])-1):
        inputLine = conversation['lines'][i]['text'].strip()
        targetLine = conversation['lines'][i+1]['text'].strip()
        if inputLine and targetLine:
            qa_pairs.append([inputLine, targetLine])

In [11]:
qa_pairs[0]

['Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.',
 "Well, I thought we'd start with pronunciation, if that's okay with you."]

# Save the file with conversation pairs in a text file

In [12]:
datafile = os.path.join('cornell movie-dialogs corpus','formatted_movie_lines.txt')
delimiter = '\t'
delimiter = str(codecs.decode(delimiter, 'unicode_escape'))

print('\nWriting newly formatted file...')
with open(datafile, 'w', encoding='utf-8') as outputfile:
    writer = csv.writer(outputfile, delimiter=delimiter)
    for pair in qa_pairs:
        writer.writerow(pair)
print('Done writing to file')


Writing newly formatted file...
Done writing to file


# Open the text file with conversation pairs

In [13]:
datafile = os.path.join('cornell movie-dialogs corpus','formatted_movie_lines.txt')

with open(datafile, 'rb') as file:
    lines = file.readlines()

for line in lines[12:20]:
    print(line)

b"I don't want to know how to say that though.  I want to know useful things. Like where the good stores are.  How much does champagne cost?  Stuff like Chat.  I have never in my life had to point out my head to someone.\tThat's because it's such a nice one.\r\r\n"
b"That's because it's such a nice one.\tForget French.\r\r\n"
b"How is our little Find the Wench A Date plan progressing?\tWell, there's someone I think might be --\r\r\n"
b'There.\tWhere?\r\r\n'
b"You got something on your mind?\tI counted on you to help my cause. You and that thug are obviously failing. Aren't we ever going on our date?\r\r\n"
b"You have my word.  As a gentleman\tYou're sweet.\r\r\n"
b"How do you get your hair to look like that?\tEber's Deep Conditioner every two days. And I never, ever use a blowdryer without the diffuser attachment.\r\r\n"
b"Sure have.\tI really, really, really wanna go, but I can't.  Not unless my sister goes.\r\r\n"


# Create a vocabulary class 
### To create a clas which creates word count, indexes the word. Also a function to delete rare which do not repeat  more than a minimum threshold.

In [14]:
PAD_token = 0  # Use padding for short sentences
SOS_token = 1  # Start of sentence
EOS_token = 2  # End of sentence

class Vocabulary:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: 'PAD', SOS_token: 'SOS', EOS_token: 'EOS'}
        self.num_words = 3  #count PAD, SOS, EOS
        
    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)
            
    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words+=1
        else:
            self.word2count[word] = self.word2count[word] + 1
 
    def trim(self, min_count):
        keep_words = []
        for k,v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)
                
        print('keep_words {} / {} = {:.4f}'.format(len(keep_words), len(self.word2index),
                                                   len(keep_words)/len(self.word2index)))
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: 'PAD', SOS_token: 'SOS', EOS_token: 'EOS'}
        self.num_words = 3
        
        for word in keep_words:
            self.addWord(word)

# Function to remove annotations from alphabets
### Normal Form Decompose - decompose to normal form - comprehension

In [15]:
def unicodeToAscii(s):
    return ''.join(c for c in unicodedata.normalize('NFD',s) if unicodedata.category(c)!='Mn')

# Function - Regular expression
### 1. To lower the case of string, (r is added to escape backslash)
### 2. To remove any character that is not a sequqnce of lower or upper case letters 
### 3. To remove a sequence of white space characters

In [16]:
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])",r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+",r" ",s).strip()
    return s

In [17]:
normalizeString("aa123aa!s's dd?\n \t")

'aa aa !s s dd ?'

# Opening the text file
### Each line of file contains conversation separated by \t
### Open it as lines
### Split it into pairs

In [18]:
datafile = os.path.join('cornell movie-dialogs corpus','formatted_movie_lines.txt')

print("Reading and processing file .. Please wait")

lines = open(datafile, encoding='utf-8').read().strip().split('\n')

pairs = [[normalizeString(s) for s in pair.split('\t')] for pair in lines]

Reading and processing file .. Please wait


In [19]:
pairs[6], pairs[6][0],pairs[6][1]

(['you re asking me out . that s so cute . what s your name again ?',
  'forget it .'],
 'you re asking me out . that s so cute . what s your name again ?',
 'forget it .')

In [20]:
lines[6]

"You're asking me out.  That's so cute. What's your name again?\tForget it."

# Filter pair to a set maximum length

In [21]:
MAX_LENGTH = 10

def filterPair(p):
    return len(p[0].split()) < MAX_LENGTH and len(p[1].split()) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [22]:
pairs = [pair for pair in pairs if len(pair) > 1]

print("There are {} conversations".format(len(pairs)))

pairs = filterPairs(pairs)

print("There are {} conversations after filtering".format(len(pairs)))

There are 221282 conversations
There are 64271 conversations after filtering


# Call the class vocabulary with the pairs list

In [23]:
voc = Vocabulary(pairs)

In [24]:
voc.word2count

{}

In [25]:
for pair in pairs:
    voc.addSentence(pair[0])
    voc.addSentence(pair[1])

print('Counted words:', voc.num_words)
for pair in pairs[:10]:
    print(pair)

Counted words: 18008
['there .', 'where ?']
['you have my word . as a gentleman', 'you re sweet .']
['hi .', 'looks like things worked out tonight huh ?']
['you know chastity ?', 'i believe we share an art instructor']
['have fun tonight ?', 'tons']
['well no . . .', 'then that s all you had to say .']
['then that s all you had to say .', 'but']
['but', 'you always been this selfish ?']
['do you listen to this crap ?', 'what crap ?']
['what good stuff ?', 'the real you .']


In [26]:
voc.word2count

{'there': 2013,
 '.': 104124,
 'where': 2475,
 '?': 43942,
 'you': 29248,
 'have': 3023,
 'my': 3148,
 'word': 125,
 'as': 558,
 'a': 8579,
 'gentleman': 22,
 're': 3658,
 'sweet': 78,
 'hi': 659,
 'looks': 199,
 'like': 2173,
 'things': 249,
 'worked': 59,
 'out': 1521,
 'tonight': 278,
 'huh': 815,
 'know': 3608,
 'chastity': 2,
 'i': 22076,
 'believe': 399,
 'we': 3946,
 'share': 25,
 'an': 809,
 'art': 26,
 'instructor': 1,
 'fun': 94,
 'tons': 7,
 'well': 1898,
 'no': 5693,
 'then': 1140,
 'that': 7220,
 's': 12452,
 'all': 1774,
 'had': 489,
 'to': 8082,
 'say': 1272,
 'but': 1478,
 'always': 330,
 'been': 760,
 'this': 3362,
 'selfish': 3,
 'do': 5416,
 'listen': 204,
 'crap': 19,
 'what': 12633,
 'good': 1756,
 'stuff': 146,
 'the': 10739,
 'real': 252,
 'fear': 31,
 'of': 3309,
 'wearing': 41,
 'pastels': 1,
 'wow': 86,
 'let': 1022,
 'go': 2057,
 'she': 2057,
 'okay': 1453,
 'hope': 154,
 'so': 2259,
 'they': 2079,
 '!': 11227,
 'not': 3814,
 'did': 2600,
 'change': 85,
 'you

# Create a function to remove words which does not repeat more than 3 times 

In [27]:
MIN_COUNT = 3

def trimRareWords(voc, pairs, MIN_COUNT):
    voc.trim(MIN_COUNT)
    keep_pairs = []
    
    for pair in pairs:
        input_sentence = pair[0]
        output_sentence = pair[1]
        
        keep_input= True
        keep_output = True
        
        for word in input_sentence.split(' '):
            if word not in voc.word2index:
                keep_input = False
                break
        
        for word in output_sentence.split(' '):
            if word not in voc.word2index:
                keep_output = False
                break
        
        if keep_input and keep_output:
            keep_pairs.append(pair)
            
    print('Trimmed from {} pairs to {}, {:.4f}% of Total'.format(len(pairs), len(keep_pairs),
                                                                 len(pairs)/len(keep_pairs)))
    return keep_pairs

In [28]:
pairs = trimRareWords(voc, pairs, MIN_COUNT)

keep_words 7823 / 18005 = 0.4345
Trimmed from 64271 pairs to 53165, 1.2089% of Total


# Create an index from the sentence

In [29]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')]+[EOS_token]

In [30]:
inp = []
out = []

for pair in pairs[:10]:  #create a test pair for indexing
    inp.append(pair[0])
    out.append(pair[1])
    
print(inp)
print(len(inp))

indexes = [indexesFromSentence(voc, sentence) for sentence in inp]
indexes

['there .', 'you have my word . as a gentleman', 'hi .', 'have fun tonight ?', 'well no . . .', 'then that s all you had to say .', 'but', 'do you listen to this crap ?', 'what good stuff ?', 'wow']
10


[[3, 4, 2],
 [7, 8, 9, 10, 4, 11, 12, 13, 2],
 [16, 4, 2],
 [8, 31, 22, 6, 2],
 [33, 34, 4, 4, 4, 2],
 [35, 36, 37, 38, 7, 39, 40, 41, 4, 2],
 [42, 2],
 [47, 7, 48, 40, 45, 49, 6, 2],
 [50, 51, 52, 6, 2],
 [58, 2]]

In [31]:
a = [[3, 4, 2],
 [7, 8, 9, 10, 4, 11, 12, 13, 2],
 [16, 4, 2],
 [8, 31, 22, 6, 2],
 [33, 34, 4, 4, 4, 2],
 [35, 36, 37, 38, 7, 39, 40, 41, 4, 2],
 [42, 2],
 [47, 7, 48, 40, 45, 49, 6, 2],
 [50, 51, 52, 6, 2],
 [58, 2]]

### Zip_longest used to zip the indexes together and fill missing values with 0

In [32]:
list(itertools.zip_longest(*a, fillvalue=0)) #how we zip the indexes together

[(3, 7, 16, 8, 33, 35, 42, 47, 50, 58),
 (4, 8, 4, 31, 34, 36, 2, 7, 51, 2),
 (2, 9, 2, 22, 4, 37, 0, 48, 52, 0),
 (0, 10, 0, 6, 4, 38, 0, 40, 6, 0),
 (0, 4, 0, 2, 4, 7, 0, 45, 2, 0),
 (0, 11, 0, 0, 2, 39, 0, 49, 0, 0),
 (0, 12, 0, 0, 0, 40, 0, 6, 0, 0),
 (0, 13, 0, 0, 0, 41, 0, 2, 0, 0),
 (0, 2, 0, 0, 0, 4, 0, 0, 0, 0),
 (0, 0, 0, 0, 0, 2, 0, 0, 0, 0)]

In [33]:
def zeroPadding(l, fillvalue=0):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

In [34]:
test_result = zeroPadding(indexes, fillvalue=0)

print(len(test_result))
test_result

10


[(3, 7, 16, 8, 33, 35, 42, 47, 50, 58),
 (4, 8, 4, 31, 34, 36, 2, 7, 51, 2),
 (2, 9, 2, 22, 4, 37, 0, 48, 52, 0),
 (0, 10, 0, 6, 4, 38, 0, 40, 6, 0),
 (0, 4, 0, 2, 4, 7, 0, 45, 2, 0),
 (0, 11, 0, 0, 2, 39, 0, 49, 0, 0),
 (0, 12, 0, 0, 0, 40, 0, 6, 0, 0),
 (0, 13, 0, 0, 0, 41, 0, 2, 0, 0),
 (0, 2, 0, 0, 0, 4, 0, 0, 0, 0),
 (0, 0, 0, 0, 0, 2, 0, 0, 0, 0)]

# Function to convert to binary matrix 

In [35]:
def binaryMatrix(l, value = 0):
    m = []
    for i,seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

In [36]:
binary_result = binaryMatrix(test_result)

binary_result

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1, 1, 0, 1, 1, 0],
 [0, 1, 0, 1, 1, 1, 0, 1, 1, 0],
 [0, 1, 0, 1, 1, 1, 0, 1, 1, 0],
 [0, 1, 0, 0, 1, 1, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
 [0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]

# Functions for converting pairs into input and output tensors

In [37]:
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

In [38]:
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Function to create training data

In [39]:
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x:len(x[0].split(' ')), reverse=True)  #sort the 1st chat of pair in descending order number of words
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len

In [40]:
'''small_batch_size = 5
batches = batch2TrainData(voc,[random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print('input variable:', input_variable)
print('lengths', lengths)
print('target variable', target_variable)
print('mask', mask)
print('max_target_len:', max_target_len)'''

"small_batch_size = 5\nbatches = batch2TrainData(voc,[random.choice(pairs) for _ in range(small_batch_size)])\ninput_variable, lengths, target_variable, mask, max_target_len = batches\n\nprint('input variable:', input_variable)\nprint('lengths', lengths)\nprint('target variable', target_variable)\nprint('mask', mask)\nprint('max_target_len:', max_target_len)"

# Creating the model

### 1. We are using bi-directional GRU enocder

In [41]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN,self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout = (0 if n_layers == 1 else dropout), 
                          bidirectional = True)
    
    def forward(self, input_seq, input_lengths, hidden=None):
        embedded =  self.embedding(input_seq)
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:,:,:self.hidden_size] + outputs[:,:,self.hidden_size:]
        return outputs, hidden

In [42]:
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        self.hidden_size = hidden_size
        
    def dot_score(self, hidden, encoder_outputs):
        return torch.sum(hidden * encoder_outputs, dim =2)
    
    def forward(self, hidden, encoder_outputs):
        attn_energies = self.dot_score(hidden, encoder_outputs)
        attn_energies = attn_energies.t()
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [43]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0):
        super(LuongAttnDecoderRNN, self).__init__()
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        
        #Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, 
                          dropout=(0 if n_layers ==1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.attn = Attn(attn_model, hidden_size)
    
    def forward(self, input_step, last_hidden, encoder_outputs):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        rnn_output, hidden = self.gru(embedded, last_hidden)
        attn_weights = self.attn(rnn_output, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0,1))
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context),1)
        concat_output = torch.tanh(self.concat(concat_input))
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        return output, hidden
        

## Loss Function

In [44]:
def maskNLLLoss(decoder_out, target, mask):
    nTotal = mask.sum()  #() added to sum
    target = target.view(-1,1)
    gathered_tensor = torch.gather(decoder_out, 1, target)
    crossEntropy = -torch.log(gathered_tensor)
    loss = crossEntropy.masked_select(mask)
    loss = loss.mean()
    loss = loss.to(device)
    return loss, nTotal.item()

## Step by step visulaization

In [45]:
'''small_batch_size = 5
batches = batch2TrainData(voc,[random.choice(pairs) for _ in range(small_batch_size)])
input_variable, lengths, target_variable, mask, max_target_len = batches

print('input variable:', input_variable)
print('lengths', lengths)
print('target variable', target_variable)
print('mask', mask)
print('max_target_len:', max_target_len)

#Define parameters
hidden_size= 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
attn_model = 'dot'
embedding = nn.Embedding(voc.num_words, hidden_size)

#Define the enocder & decoder
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
encoder = encoder.to(device)
decoder = decoder.to(device)

encoder.train()
decoder.train()

encoder_optimizer = optim.Adam(encoder.parameters(), lr = 0.0001)
decoder_optimizer = optim.Adam(decoder.parameters(), lr = 0.0001)
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()

input_variable = input_variable.to(device)
lengths = lengths.to(device)
target_variable = target_variable.to(device)
mask = mask.to(device)

loss = 0
print_losses = []
n_totals = 0

encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
print("Encoder Outputs Shape:", encoder_outputs.shape)
print("Last Encoder Hidden Shape", encoder_hidden.shape)

decoder_input = torch.LongTensor([[SOS_token for _ in range(small_batch_size)]])
decoder_input = decoder_input.to(device)
print("Initial Decoder Input Shape:", decoder_input.shape)
print(decoder_input)

decoder_hidden = encoder_hidden[:decoder.n_layers]
print("Initial Decoder hidden state shape:", decoder_hidden.shape)
print("\n")
print("------------------------------------------------------------------------------")
print("Now let us look at what is happening in every timestep of the GRU!")
print("------------------------------------------------------------------------------")


for t in range(max_target_len):
    decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
    print("Decoder Output Shape:", decoder_output.shape)
    print("Decoder Hidden Shape:", decoder_hidden.shape)
    
    decoder_input = target_variable[t].view(1,-1)
    print("The target variable at the currrent timestep before reshaping:", target_variable[t])
    print("The target variable at the curent timestep shape before reshaping:", target_variable[t].shape)
    print("The Decoder input shape(reshape the target variable): ", decoder_input.shape)
    
    print("The mask at the current timestep:", mask[t])
    print("The mask at the current timestep shape:", mask[t].shape)
    mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
    print("Mask Loss:", mask_loss)
    print("Total:", nTotal)
    loss+=mask_loss
    print_losses.append(mask_loss.item() * nTotal)
    print(print_losses)
    n_totals+=nTotal
    print(n_totals)
    encoder_optimizer.step()
    decoder_optimizer.step()
    returned_loss = sum(print_losses) / n_totals
    print("Returned Loss:", returned_loss)
    print("\n")
    print("-------------------------------DONE ONE TIMESTEP---------------------------------")
    print("\n")'''

'small_batch_size = 5\nbatches = batch2TrainData(voc,[random.choice(pairs) for _ in range(small_batch_size)])\ninput_variable, lengths, target_variable, mask, max_target_len = batches\n\nprint(\'input variable:\', input_variable)\nprint(\'lengths\', lengths)\nprint(\'target variable\', target_variable)\nprint(\'mask\', mask)\nprint(\'max_target_len:\', max_target_len)\n\n#Define parameters\nhidden_size= 500\nencoder_n_layers = 2\ndecoder_n_layers = 2\ndropout = 0.1\nattn_model = \'dot\'\nembedding = nn.Embedding(voc.num_words, hidden_size)\n\n#Define the enocder & decoder\nencoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)\ndecoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)\nencoder = encoder.to(device)\ndecoder = decoder.to(device)\n\nencoder.train()\ndecoder.train()\n\nencoder_optimizer = optim.Adam(encoder.parameters(), lr = 0.0001)\ndecoder_optimizer = optim.Adam(decoder.parameters(), lr = 0.0001)\nencode

In [54]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, 
          encoder_optimizer, decoder_optimizer, batch_size, clip, max_length = MAX_LENGTH):
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    
    loss = 0
    print_losses = []
    n_totals = 0
    
    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
    
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input = decoder_input.to(device)
    
    decoder_hidden = encoder_hidden[:decoder.n_layers]
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_input = target_variable[t].view(1,-1)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss+=mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals+=nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _,topi = decoder_optput.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss+=mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals+=nTotal
        
    loss.backward()
    
    _ = torch.nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = torch.nn.utils.clip_grad_norm_(decoder.parameters(), clip)
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return sum(print_losses)/ n_totals
    

In [47]:
def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, 
               embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, 
               print_every, save_every, clip, corpus_name, loadFilename):

    # Load batches for each iteration
    training_batches = [batch2TrainData(voc, [random.choice(pairs) for _ in range(batch_size)])
                      for _ in range(n_iteration)]

    # Initializations
    print('Initializing ...')
    start_iteration = 1
    print_loss = 0
    if loadFilename:
        start_iteration = checkpoint['iteration'] + 1

    # Training loop
    print("Training...")
    for iteration in range(start_iteration, n_iteration + 1):
        training_batch = training_batches[iteration - 1]
        # Extract fields from batch
        input_variable, lengths, target_variable, mask, max_target_len = training_batch

        # Run a training iteration with batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # Print progress
        if iteration % print_every == 0:
            print_loss_avg = print_loss / print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration / n_iteration * 100, print_loss_avg))
            print_loss = 0

        # Save checkpoint
        if (iteration % save_every == 0):
            directory = os.path.join(save_dir, model_name, corpus_name, '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

In [48]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [49]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


def evaluateInput(encoder, decoder, searcher, voc):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
            # Normalize sentence
            input_sentence = normalizeString(input_sentence)
            # Evaluate sentence
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            # Format and print response sentence
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            print('Bot:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")

In [50]:
# Configure models
model_name = 'cb_model'
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

# Set checkpoint to load from; set to None if starting from scratch
loadFilename = None
checkpoint_iter = 4000
#loadFilename = os.path.join(save_dir, model_name, corpus_name,
#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
#                            '{}_checkpoint.tar'.format(checkpoint_iter))


# Load model if a loadFilename is provided
if loadFilename:
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


print('Building encoder and decoder ...')
# Initialize word embeddings
embedding = nn.Embedding(voc.num_words, hidden_size)
if loadFilename:
    embedding.load_state_dict(embedding_sd)
# Initialize encoder & decoder models
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
if loadFilename:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
# Use appropriate device
encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

Building encoder and decoder ...
Models built and ready to go!


In [55]:
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# Run training iterations
print("Starting Training!")
trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
           embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
           print_every, save_every, clip, corpus_name, loadFilename)

Building optimizers ...
Starting Training!
Initializing ...
Training...
Iteration: 1; Percent complete: 0.0%; Average loss: 8.9682
Iteration: 2; Percent complete: 0.1%; Average loss: 8.7466
Iteration: 3; Percent complete: 0.1%; Average loss: 8.5202
Iteration: 4; Percent complete: 0.1%; Average loss: 7.9875
Iteration: 5; Percent complete: 0.1%; Average loss: 7.6512
Iteration: 6; Percent complete: 0.1%; Average loss: 6.8905
Iteration: 7; Percent complete: 0.2%; Average loss: 6.5037
Iteration: 8; Percent complete: 0.2%; Average loss: 6.7799
Iteration: 9; Percent complete: 0.2%; Average loss: 6.4198
Iteration: 10; Percent complete: 0.2%; Average loss: 6.2929
Iteration: 11; Percent complete: 0.3%; Average loss: 5.4661
Iteration: 12; Percent complete: 0.3%; Average loss: 5.9222
Iteration: 13; Percent complete: 0.3%; Average loss: 5.1892
Iteration: 14; Percent complete: 0.4%; Average loss: 4.8910
Iteration: 15; Percent complete: 0.4%; Average loss: 4.8181
Iteration: 16; Percent complete: 0.4%

Iteration: 136; Percent complete: 3.4%; Average loss: 3.7693
Iteration: 137; Percent complete: 3.4%; Average loss: 3.7230
Iteration: 138; Percent complete: 3.5%; Average loss: 3.7412
Iteration: 139; Percent complete: 3.5%; Average loss: 3.6472
Iteration: 140; Percent complete: 3.5%; Average loss: 3.6630
Iteration: 141; Percent complete: 3.5%; Average loss: 3.4587
Iteration: 142; Percent complete: 3.5%; Average loss: 3.6421
Iteration: 143; Percent complete: 3.6%; Average loss: 3.5282
Iteration: 144; Percent complete: 3.6%; Average loss: 3.6722
Iteration: 145; Percent complete: 3.6%; Average loss: 3.7428
Iteration: 146; Percent complete: 3.6%; Average loss: 3.5540
Iteration: 147; Percent complete: 3.7%; Average loss: 3.6635
Iteration: 148; Percent complete: 3.7%; Average loss: 3.9120
Iteration: 149; Percent complete: 3.7%; Average loss: 3.5492
Iteration: 150; Percent complete: 3.8%; Average loss: 3.7582
Iteration: 151; Percent complete: 3.8%; Average loss: 3.6253
Iteration: 152; Percent 

Iteration: 271; Percent complete: 6.8%; Average loss: 3.2718
Iteration: 272; Percent complete: 6.8%; Average loss: 3.3204
Iteration: 273; Percent complete: 6.8%; Average loss: 3.3793
Iteration: 274; Percent complete: 6.9%; Average loss: 3.4504
Iteration: 275; Percent complete: 6.9%; Average loss: 3.4354
Iteration: 276; Percent complete: 6.9%; Average loss: 3.2260
Iteration: 277; Percent complete: 6.9%; Average loss: 3.3650
Iteration: 278; Percent complete: 7.0%; Average loss: 3.2623
Iteration: 279; Percent complete: 7.0%; Average loss: 3.4532
Iteration: 280; Percent complete: 7.0%; Average loss: 3.5294
Iteration: 281; Percent complete: 7.0%; Average loss: 3.4725
Iteration: 282; Percent complete: 7.0%; Average loss: 3.2585
Iteration: 283; Percent complete: 7.1%; Average loss: 3.1777
Iteration: 284; Percent complete: 7.1%; Average loss: 3.1949
Iteration: 285; Percent complete: 7.1%; Average loss: 3.3492
Iteration: 286; Percent complete: 7.1%; Average loss: 3.3218
Iteration: 287; Percent 

Iteration: 406; Percent complete: 10.2%; Average loss: 3.2737
Iteration: 407; Percent complete: 10.2%; Average loss: 3.2325
Iteration: 408; Percent complete: 10.2%; Average loss: 3.3328
Iteration: 409; Percent complete: 10.2%; Average loss: 3.0878
Iteration: 410; Percent complete: 10.2%; Average loss: 3.2570
Iteration: 411; Percent complete: 10.3%; Average loss: 3.1672
Iteration: 412; Percent complete: 10.3%; Average loss: 3.3389
Iteration: 413; Percent complete: 10.3%; Average loss: 3.2364
Iteration: 414; Percent complete: 10.3%; Average loss: 3.1642
Iteration: 415; Percent complete: 10.4%; Average loss: 3.1594
Iteration: 416; Percent complete: 10.4%; Average loss: 3.2046
Iteration: 417; Percent complete: 10.4%; Average loss: 3.1673
Iteration: 418; Percent complete: 10.4%; Average loss: 3.3764
Iteration: 419; Percent complete: 10.5%; Average loss: 3.2919
Iteration: 420; Percent complete: 10.5%; Average loss: 3.0755
Iteration: 421; Percent complete: 10.5%; Average loss: 2.9852
Iteratio

Iteration: 539; Percent complete: 13.5%; Average loss: 3.0969
Iteration: 540; Percent complete: 13.5%; Average loss: 3.1722
Iteration: 541; Percent complete: 13.5%; Average loss: 3.2220
Iteration: 542; Percent complete: 13.6%; Average loss: 3.1824
Iteration: 543; Percent complete: 13.6%; Average loss: 2.8848
Iteration: 544; Percent complete: 13.6%; Average loss: 3.1483
Iteration: 545; Percent complete: 13.6%; Average loss: 2.9908
Iteration: 546; Percent complete: 13.7%; Average loss: 2.9932
Iteration: 547; Percent complete: 13.7%; Average loss: 3.2727
Iteration: 548; Percent complete: 13.7%; Average loss: 2.8729
Iteration: 549; Percent complete: 13.7%; Average loss: 3.2337
Iteration: 550; Percent complete: 13.8%; Average loss: 2.9562
Iteration: 551; Percent complete: 13.8%; Average loss: 3.3129
Iteration: 552; Percent complete: 13.8%; Average loss: 3.0802
Iteration: 553; Percent complete: 13.8%; Average loss: 3.1233
Iteration: 554; Percent complete: 13.9%; Average loss: 3.1109
Iteratio

Iteration: 672; Percent complete: 16.8%; Average loss: 2.8675
Iteration: 673; Percent complete: 16.8%; Average loss: 2.9664
Iteration: 674; Percent complete: 16.9%; Average loss: 3.1381
Iteration: 675; Percent complete: 16.9%; Average loss: 3.1193
Iteration: 676; Percent complete: 16.9%; Average loss: 2.9081
Iteration: 677; Percent complete: 16.9%; Average loss: 3.2408
Iteration: 678; Percent complete: 17.0%; Average loss: 3.0908
Iteration: 679; Percent complete: 17.0%; Average loss: 3.0628
Iteration: 680; Percent complete: 17.0%; Average loss: 3.0761
Iteration: 681; Percent complete: 17.0%; Average loss: 3.0741
Iteration: 682; Percent complete: 17.1%; Average loss: 3.0895
Iteration: 683; Percent complete: 17.1%; Average loss: 2.8718
Iteration: 684; Percent complete: 17.1%; Average loss: 3.3302
Iteration: 685; Percent complete: 17.1%; Average loss: 3.0151
Iteration: 686; Percent complete: 17.2%; Average loss: 3.1373
Iteration: 687; Percent complete: 17.2%; Average loss: 3.0712
Iteratio

Iteration: 805; Percent complete: 20.1%; Average loss: 2.8865
Iteration: 806; Percent complete: 20.2%; Average loss: 3.1373
Iteration: 807; Percent complete: 20.2%; Average loss: 2.7135
Iteration: 808; Percent complete: 20.2%; Average loss: 2.9431
Iteration: 809; Percent complete: 20.2%; Average loss: 2.9535
Iteration: 810; Percent complete: 20.2%; Average loss: 2.8953
Iteration: 811; Percent complete: 20.3%; Average loss: 3.0020
Iteration: 812; Percent complete: 20.3%; Average loss: 3.2816
Iteration: 813; Percent complete: 20.3%; Average loss: 3.0219
Iteration: 814; Percent complete: 20.3%; Average loss: 3.1160
Iteration: 815; Percent complete: 20.4%; Average loss: 3.1284
Iteration: 816; Percent complete: 20.4%; Average loss: 2.9450
Iteration: 817; Percent complete: 20.4%; Average loss: 2.9581
Iteration: 818; Percent complete: 20.4%; Average loss: 3.0028
Iteration: 819; Percent complete: 20.5%; Average loss: 2.9057
Iteration: 820; Percent complete: 20.5%; Average loss: 3.0178
Iteratio

Iteration: 938; Percent complete: 23.4%; Average loss: 3.2455
Iteration: 939; Percent complete: 23.5%; Average loss: 2.7410
Iteration: 940; Percent complete: 23.5%; Average loss: 2.9439
Iteration: 941; Percent complete: 23.5%; Average loss: 2.8463
Iteration: 942; Percent complete: 23.5%; Average loss: 2.9876
Iteration: 943; Percent complete: 23.6%; Average loss: 2.7541
Iteration: 944; Percent complete: 23.6%; Average loss: 2.8363
Iteration: 945; Percent complete: 23.6%; Average loss: 2.9182
Iteration: 946; Percent complete: 23.6%; Average loss: 2.8883
Iteration: 947; Percent complete: 23.7%; Average loss: 2.8646
Iteration: 948; Percent complete: 23.7%; Average loss: 3.0061
Iteration: 949; Percent complete: 23.7%; Average loss: 3.0173
Iteration: 950; Percent complete: 23.8%; Average loss: 3.0896
Iteration: 951; Percent complete: 23.8%; Average loss: 2.8836
Iteration: 952; Percent complete: 23.8%; Average loss: 2.7953
Iteration: 953; Percent complete: 23.8%; Average loss: 2.9732
Iteratio

Iteration: 1070; Percent complete: 26.8%; Average loss: 2.7867
Iteration: 1071; Percent complete: 26.8%; Average loss: 3.1237
Iteration: 1072; Percent complete: 26.8%; Average loss: 2.9889
Iteration: 1073; Percent complete: 26.8%; Average loss: 2.9339
Iteration: 1074; Percent complete: 26.9%; Average loss: 2.9003
Iteration: 1075; Percent complete: 26.9%; Average loss: 3.0286
Iteration: 1076; Percent complete: 26.9%; Average loss: 2.4790
Iteration: 1077; Percent complete: 26.9%; Average loss: 2.9385
Iteration: 1078; Percent complete: 27.0%; Average loss: 2.5615
Iteration: 1079; Percent complete: 27.0%; Average loss: 2.8095
Iteration: 1080; Percent complete: 27.0%; Average loss: 3.1641
Iteration: 1081; Percent complete: 27.0%; Average loss: 3.0510
Iteration: 1082; Percent complete: 27.1%; Average loss: 2.6764
Iteration: 1083; Percent complete: 27.1%; Average loss: 3.0404
Iteration: 1084; Percent complete: 27.1%; Average loss: 2.9792
Iteration: 1085; Percent complete: 27.1%; Average loss:

Iteration: 1201; Percent complete: 30.0%; Average loss: 2.6333
Iteration: 1202; Percent complete: 30.0%; Average loss: 3.0505
Iteration: 1203; Percent complete: 30.1%; Average loss: 2.9594
Iteration: 1204; Percent complete: 30.1%; Average loss: 2.9222
Iteration: 1205; Percent complete: 30.1%; Average loss: 2.5905
Iteration: 1206; Percent complete: 30.1%; Average loss: 2.9363
Iteration: 1207; Percent complete: 30.2%; Average loss: 3.1615
Iteration: 1208; Percent complete: 30.2%; Average loss: 2.7524
Iteration: 1209; Percent complete: 30.2%; Average loss: 2.8556
Iteration: 1210; Percent complete: 30.2%; Average loss: 2.9699
Iteration: 1211; Percent complete: 30.3%; Average loss: 2.8683
Iteration: 1212; Percent complete: 30.3%; Average loss: 2.6954
Iteration: 1213; Percent complete: 30.3%; Average loss: 2.8540
Iteration: 1214; Percent complete: 30.3%; Average loss: 2.5875
Iteration: 1215; Percent complete: 30.4%; Average loss: 2.5692
Iteration: 1216; Percent complete: 30.4%; Average loss:

Iteration: 1332; Percent complete: 33.3%; Average loss: 2.8933
Iteration: 1333; Percent complete: 33.3%; Average loss: 2.8423
Iteration: 1334; Percent complete: 33.4%; Average loss: 2.8495
Iteration: 1335; Percent complete: 33.4%; Average loss: 2.8388
Iteration: 1336; Percent complete: 33.4%; Average loss: 2.7905
Iteration: 1337; Percent complete: 33.4%; Average loss: 2.7273
Iteration: 1338; Percent complete: 33.5%; Average loss: 2.7588
Iteration: 1339; Percent complete: 33.5%; Average loss: 2.8971
Iteration: 1340; Percent complete: 33.5%; Average loss: 2.8623
Iteration: 1341; Percent complete: 33.5%; Average loss: 2.9023
Iteration: 1342; Percent complete: 33.6%; Average loss: 2.8837
Iteration: 1343; Percent complete: 33.6%; Average loss: 2.9793
Iteration: 1344; Percent complete: 33.6%; Average loss: 2.7350
Iteration: 1345; Percent complete: 33.6%; Average loss: 2.8442
Iteration: 1346; Percent complete: 33.7%; Average loss: 2.8286
Iteration: 1347; Percent complete: 33.7%; Average loss:

Iteration: 1463; Percent complete: 36.6%; Average loss: 2.8548
Iteration: 1464; Percent complete: 36.6%; Average loss: 2.5950
Iteration: 1465; Percent complete: 36.6%; Average loss: 2.8620
Iteration: 1466; Percent complete: 36.6%; Average loss: 2.4825
Iteration: 1467; Percent complete: 36.7%; Average loss: 2.7991
Iteration: 1468; Percent complete: 36.7%; Average loss: 2.8660
Iteration: 1469; Percent complete: 36.7%; Average loss: 2.6505
Iteration: 1470; Percent complete: 36.8%; Average loss: 2.8801
Iteration: 1471; Percent complete: 36.8%; Average loss: 2.6040
Iteration: 1472; Percent complete: 36.8%; Average loss: 2.6878
Iteration: 1473; Percent complete: 36.8%; Average loss: 2.8223
Iteration: 1474; Percent complete: 36.9%; Average loss: 2.7796
Iteration: 1475; Percent complete: 36.9%; Average loss: 2.7332
Iteration: 1476; Percent complete: 36.9%; Average loss: 2.7354
Iteration: 1477; Percent complete: 36.9%; Average loss: 2.8472
Iteration: 1478; Percent complete: 37.0%; Average loss:

Iteration: 1594; Percent complete: 39.9%; Average loss: 2.8318
Iteration: 1595; Percent complete: 39.9%; Average loss: 2.6002
Iteration: 1596; Percent complete: 39.9%; Average loss: 2.9927
Iteration: 1597; Percent complete: 39.9%; Average loss: 2.9985
Iteration: 1598; Percent complete: 40.0%; Average loss: 2.9222
Iteration: 1599; Percent complete: 40.0%; Average loss: 2.4940
Iteration: 1600; Percent complete: 40.0%; Average loss: 2.8246
Iteration: 1601; Percent complete: 40.0%; Average loss: 2.9342
Iteration: 1602; Percent complete: 40.1%; Average loss: 2.8197
Iteration: 1603; Percent complete: 40.1%; Average loss: 2.6459
Iteration: 1604; Percent complete: 40.1%; Average loss: 2.9047
Iteration: 1605; Percent complete: 40.1%; Average loss: 2.9568
Iteration: 1606; Percent complete: 40.2%; Average loss: 2.7569
Iteration: 1607; Percent complete: 40.2%; Average loss: 2.8163
Iteration: 1608; Percent complete: 40.2%; Average loss: 2.6594
Iteration: 1609; Percent complete: 40.2%; Average loss:

Iteration: 1725; Percent complete: 43.1%; Average loss: 2.6227
Iteration: 1726; Percent complete: 43.1%; Average loss: 2.6840
Iteration: 1727; Percent complete: 43.2%; Average loss: 2.8465
Iteration: 1728; Percent complete: 43.2%; Average loss: 2.9543
Iteration: 1729; Percent complete: 43.2%; Average loss: 2.6683
Iteration: 1730; Percent complete: 43.2%; Average loss: 2.5747
Iteration: 1731; Percent complete: 43.3%; Average loss: 2.6574
Iteration: 1732; Percent complete: 43.3%; Average loss: 2.6659
Iteration: 1733; Percent complete: 43.3%; Average loss: 2.7059
Iteration: 1734; Percent complete: 43.4%; Average loss: 2.6482
Iteration: 1735; Percent complete: 43.4%; Average loss: 2.7327
Iteration: 1736; Percent complete: 43.4%; Average loss: 2.8921
Iteration: 1737; Percent complete: 43.4%; Average loss: 2.6751
Iteration: 1738; Percent complete: 43.5%; Average loss: 2.8008
Iteration: 1739; Percent complete: 43.5%; Average loss: 2.9124
Iteration: 1740; Percent complete: 43.5%; Average loss:

Iteration: 1856; Percent complete: 46.4%; Average loss: 2.6589
Iteration: 1857; Percent complete: 46.4%; Average loss: 2.6855
Iteration: 1858; Percent complete: 46.5%; Average loss: 2.7085
Iteration: 1859; Percent complete: 46.5%; Average loss: 2.8050
Iteration: 1860; Percent complete: 46.5%; Average loss: 2.5935
Iteration: 1861; Percent complete: 46.5%; Average loss: 2.8424
Iteration: 1862; Percent complete: 46.6%; Average loss: 2.3284
Iteration: 1863; Percent complete: 46.6%; Average loss: 2.6462
Iteration: 1864; Percent complete: 46.6%; Average loss: 2.6494
Iteration: 1865; Percent complete: 46.6%; Average loss: 2.8861
Iteration: 1866; Percent complete: 46.7%; Average loss: 2.6770
Iteration: 1867; Percent complete: 46.7%; Average loss: 2.7457
Iteration: 1868; Percent complete: 46.7%; Average loss: 2.7736
Iteration: 1869; Percent complete: 46.7%; Average loss: 2.7143
Iteration: 1870; Percent complete: 46.8%; Average loss: 2.6358
Iteration: 1871; Percent complete: 46.8%; Average loss:

Iteration: 1987; Percent complete: 49.7%; Average loss: 2.6850
Iteration: 1988; Percent complete: 49.7%; Average loss: 2.6242
Iteration: 1989; Percent complete: 49.7%; Average loss: 2.5658
Iteration: 1990; Percent complete: 49.8%; Average loss: 2.6230
Iteration: 1991; Percent complete: 49.8%; Average loss: 2.6374
Iteration: 1992; Percent complete: 49.8%; Average loss: 2.7831
Iteration: 1993; Percent complete: 49.8%; Average loss: 2.6113
Iteration: 1994; Percent complete: 49.9%; Average loss: 2.9125
Iteration: 1995; Percent complete: 49.9%; Average loss: 2.8054
Iteration: 1996; Percent complete: 49.9%; Average loss: 2.6805
Iteration: 1997; Percent complete: 49.9%; Average loss: 2.7406
Iteration: 1998; Percent complete: 50.0%; Average loss: 2.6885
Iteration: 1999; Percent complete: 50.0%; Average loss: 2.7061
Iteration: 2000; Percent complete: 50.0%; Average loss: 2.6558
Iteration: 2001; Percent complete: 50.0%; Average loss: 2.5165
Iteration: 2002; Percent complete: 50.0%; Average loss:

Iteration: 2118; Percent complete: 52.9%; Average loss: 2.5768
Iteration: 2119; Percent complete: 53.0%; Average loss: 2.6955
Iteration: 2120; Percent complete: 53.0%; Average loss: 2.7813
Iteration: 2121; Percent complete: 53.0%; Average loss: 2.5019
Iteration: 2122; Percent complete: 53.0%; Average loss: 2.8460
Iteration: 2123; Percent complete: 53.1%; Average loss: 2.3662
Iteration: 2124; Percent complete: 53.1%; Average loss: 2.6954
Iteration: 2125; Percent complete: 53.1%; Average loss: 2.6144
Iteration: 2126; Percent complete: 53.1%; Average loss: 2.6765
Iteration: 2127; Percent complete: 53.2%; Average loss: 2.8669
Iteration: 2128; Percent complete: 53.2%; Average loss: 2.4526
Iteration: 2129; Percent complete: 53.2%; Average loss: 2.2271
Iteration: 2130; Percent complete: 53.2%; Average loss: 2.6612
Iteration: 2131; Percent complete: 53.3%; Average loss: 2.4634
Iteration: 2132; Percent complete: 53.3%; Average loss: 2.4363
Iteration: 2133; Percent complete: 53.3%; Average loss:

Iteration: 2249; Percent complete: 56.2%; Average loss: 2.7206
Iteration: 2250; Percent complete: 56.2%; Average loss: 2.7334
Iteration: 2251; Percent complete: 56.3%; Average loss: 2.5065
Iteration: 2252; Percent complete: 56.3%; Average loss: 2.7075
Iteration: 2253; Percent complete: 56.3%; Average loss: 2.5620
Iteration: 2254; Percent complete: 56.4%; Average loss: 2.7513
Iteration: 2255; Percent complete: 56.4%; Average loss: 2.7542
Iteration: 2256; Percent complete: 56.4%; Average loss: 2.5059
Iteration: 2257; Percent complete: 56.4%; Average loss: 2.4948
Iteration: 2258; Percent complete: 56.5%; Average loss: 2.6864
Iteration: 2259; Percent complete: 56.5%; Average loss: 2.3271
Iteration: 2260; Percent complete: 56.5%; Average loss: 2.6772
Iteration: 2261; Percent complete: 56.5%; Average loss: 2.3717
Iteration: 2262; Percent complete: 56.5%; Average loss: 2.3618
Iteration: 2263; Percent complete: 56.6%; Average loss: 2.2986
Iteration: 2264; Percent complete: 56.6%; Average loss:

Iteration: 2380; Percent complete: 59.5%; Average loss: 2.5706
Iteration: 2381; Percent complete: 59.5%; Average loss: 2.5432
Iteration: 2382; Percent complete: 59.6%; Average loss: 2.4231
Iteration: 2383; Percent complete: 59.6%; Average loss: 2.5454
Iteration: 2384; Percent complete: 59.6%; Average loss: 2.3457
Iteration: 2385; Percent complete: 59.6%; Average loss: 2.6919
Iteration: 2386; Percent complete: 59.7%; Average loss: 2.6387
Iteration: 2387; Percent complete: 59.7%; Average loss: 2.5835
Iteration: 2388; Percent complete: 59.7%; Average loss: 2.4326
Iteration: 2389; Percent complete: 59.7%; Average loss: 2.6067
Iteration: 2390; Percent complete: 59.8%; Average loss: 2.5138
Iteration: 2391; Percent complete: 59.8%; Average loss: 2.3743
Iteration: 2392; Percent complete: 59.8%; Average loss: 2.5279
Iteration: 2393; Percent complete: 59.8%; Average loss: 2.5258
Iteration: 2394; Percent complete: 59.9%; Average loss: 2.4563
Iteration: 2395; Percent complete: 59.9%; Average loss:

Iteration: 2511; Percent complete: 62.8%; Average loss: 2.4141
Iteration: 2512; Percent complete: 62.8%; Average loss: 2.3451
Iteration: 2513; Percent complete: 62.8%; Average loss: 2.2807
Iteration: 2514; Percent complete: 62.8%; Average loss: 2.6918
Iteration: 2515; Percent complete: 62.9%; Average loss: 2.5722
Iteration: 2516; Percent complete: 62.9%; Average loss: 2.5630
Iteration: 2517; Percent complete: 62.9%; Average loss: 2.4267
Iteration: 2518; Percent complete: 62.9%; Average loss: 2.5218
Iteration: 2519; Percent complete: 63.0%; Average loss: 2.4731
Iteration: 2520; Percent complete: 63.0%; Average loss: 2.4334
Iteration: 2521; Percent complete: 63.0%; Average loss: 2.4544
Iteration: 2522; Percent complete: 63.0%; Average loss: 2.6371
Iteration: 2523; Percent complete: 63.1%; Average loss: 2.5226
Iteration: 2524; Percent complete: 63.1%; Average loss: 2.5874
Iteration: 2525; Percent complete: 63.1%; Average loss: 2.5253
Iteration: 2526; Percent complete: 63.1%; Average loss:

Iteration: 2642; Percent complete: 66.0%; Average loss: 2.6590
Iteration: 2643; Percent complete: 66.1%; Average loss: 2.5486
Iteration: 2644; Percent complete: 66.1%; Average loss: 2.5824
Iteration: 2645; Percent complete: 66.1%; Average loss: 2.2933
Iteration: 2646; Percent complete: 66.1%; Average loss: 2.4955
Iteration: 2647; Percent complete: 66.2%; Average loss: 2.4738
Iteration: 2648; Percent complete: 66.2%; Average loss: 2.7643
Iteration: 2649; Percent complete: 66.2%; Average loss: 2.4763
Iteration: 2650; Percent complete: 66.2%; Average loss: 2.4883
Iteration: 2651; Percent complete: 66.3%; Average loss: 2.6790
Iteration: 2652; Percent complete: 66.3%; Average loss: 2.4882
Iteration: 2653; Percent complete: 66.3%; Average loss: 2.3906
Iteration: 2654; Percent complete: 66.3%; Average loss: 2.4475
Iteration: 2655; Percent complete: 66.4%; Average loss: 2.6782
Iteration: 2656; Percent complete: 66.4%; Average loss: 2.5956
Iteration: 2657; Percent complete: 66.4%; Average loss:

Iteration: 2773; Percent complete: 69.3%; Average loss: 2.4752
Iteration: 2774; Percent complete: 69.3%; Average loss: 2.4654
Iteration: 2775; Percent complete: 69.4%; Average loss: 2.6533
Iteration: 2776; Percent complete: 69.4%; Average loss: 2.6563
Iteration: 2777; Percent complete: 69.4%; Average loss: 2.6636
Iteration: 2778; Percent complete: 69.5%; Average loss: 2.3620
Iteration: 2779; Percent complete: 69.5%; Average loss: 2.4702
Iteration: 2780; Percent complete: 69.5%; Average loss: 2.4737
Iteration: 2781; Percent complete: 69.5%; Average loss: 2.4770
Iteration: 2782; Percent complete: 69.5%; Average loss: 2.4694
Iteration: 2783; Percent complete: 69.6%; Average loss: 2.3946
Iteration: 2784; Percent complete: 69.6%; Average loss: 2.5348
Iteration: 2785; Percent complete: 69.6%; Average loss: 2.4834
Iteration: 2786; Percent complete: 69.7%; Average loss: 2.4384
Iteration: 2787; Percent complete: 69.7%; Average loss: 2.5215
Iteration: 2788; Percent complete: 69.7%; Average loss:

Iteration: 2904; Percent complete: 72.6%; Average loss: 2.3350
Iteration: 2905; Percent complete: 72.6%; Average loss: 2.5220
Iteration: 2906; Percent complete: 72.7%; Average loss: 2.2477
Iteration: 2907; Percent complete: 72.7%; Average loss: 2.5330
Iteration: 2908; Percent complete: 72.7%; Average loss: 2.8788
Iteration: 2909; Percent complete: 72.7%; Average loss: 2.5529
Iteration: 2910; Percent complete: 72.8%; Average loss: 2.3615
Iteration: 2911; Percent complete: 72.8%; Average loss: 2.3160
Iteration: 2912; Percent complete: 72.8%; Average loss: 2.4622
Iteration: 2913; Percent complete: 72.8%; Average loss: 2.4126
Iteration: 2914; Percent complete: 72.9%; Average loss: 2.4375
Iteration: 2915; Percent complete: 72.9%; Average loss: 2.4524
Iteration: 2916; Percent complete: 72.9%; Average loss: 2.3845
Iteration: 2917; Percent complete: 72.9%; Average loss: 2.4070
Iteration: 2918; Percent complete: 73.0%; Average loss: 2.3127
Iteration: 2919; Percent complete: 73.0%; Average loss:

Iteration: 3035; Percent complete: 75.9%; Average loss: 2.4095
Iteration: 3036; Percent complete: 75.9%; Average loss: 2.4238
Iteration: 3037; Percent complete: 75.9%; Average loss: 2.5162
Iteration: 3038; Percent complete: 75.9%; Average loss: 2.5724
Iteration: 3039; Percent complete: 76.0%; Average loss: 2.3386
Iteration: 3040; Percent complete: 76.0%; Average loss: 2.4108
Iteration: 3041; Percent complete: 76.0%; Average loss: 2.3205
Iteration: 3042; Percent complete: 76.0%; Average loss: 2.3724
Iteration: 3043; Percent complete: 76.1%; Average loss: 2.4239
Iteration: 3044; Percent complete: 76.1%; Average loss: 2.2469
Iteration: 3045; Percent complete: 76.1%; Average loss: 2.2079
Iteration: 3046; Percent complete: 76.1%; Average loss: 2.2799
Iteration: 3047; Percent complete: 76.2%; Average loss: 2.1884
Iteration: 3048; Percent complete: 76.2%; Average loss: 2.3691
Iteration: 3049; Percent complete: 76.2%; Average loss: 2.2783
Iteration: 3050; Percent complete: 76.2%; Average loss:

Iteration: 3166; Percent complete: 79.1%; Average loss: 2.3682
Iteration: 3167; Percent complete: 79.2%; Average loss: 2.3336
Iteration: 3168; Percent complete: 79.2%; Average loss: 2.2939
Iteration: 3169; Percent complete: 79.2%; Average loss: 2.3207
Iteration: 3170; Percent complete: 79.2%; Average loss: 2.2215
Iteration: 3171; Percent complete: 79.3%; Average loss: 2.5763
Iteration: 3172; Percent complete: 79.3%; Average loss: 2.2391
Iteration: 3173; Percent complete: 79.3%; Average loss: 2.1355
Iteration: 3174; Percent complete: 79.3%; Average loss: 2.4216
Iteration: 3175; Percent complete: 79.4%; Average loss: 2.3437
Iteration: 3176; Percent complete: 79.4%; Average loss: 2.5292
Iteration: 3177; Percent complete: 79.4%; Average loss: 2.3919
Iteration: 3178; Percent complete: 79.5%; Average loss: 2.2270
Iteration: 3179; Percent complete: 79.5%; Average loss: 2.2934
Iteration: 3180; Percent complete: 79.5%; Average loss: 2.3078
Iteration: 3181; Percent complete: 79.5%; Average loss:

Iteration: 3297; Percent complete: 82.4%; Average loss: 2.2928
Iteration: 3298; Percent complete: 82.5%; Average loss: 2.1248
Iteration: 3299; Percent complete: 82.5%; Average loss: 2.2009
Iteration: 3300; Percent complete: 82.5%; Average loss: 2.4196
Iteration: 3301; Percent complete: 82.5%; Average loss: 2.2128
Iteration: 3302; Percent complete: 82.5%; Average loss: 2.4077
Iteration: 3303; Percent complete: 82.6%; Average loss: 2.3517
Iteration: 3304; Percent complete: 82.6%; Average loss: 2.5449
Iteration: 3305; Percent complete: 82.6%; Average loss: 2.2684
Iteration: 3306; Percent complete: 82.7%; Average loss: 2.3427
Iteration: 3307; Percent complete: 82.7%; Average loss: 2.4162
Iteration: 3308; Percent complete: 82.7%; Average loss: 2.2571
Iteration: 3309; Percent complete: 82.7%; Average loss: 2.2274
Iteration: 3310; Percent complete: 82.8%; Average loss: 2.4206
Iteration: 3311; Percent complete: 82.8%; Average loss: 2.3420
Iteration: 3312; Percent complete: 82.8%; Average loss:

Iteration: 3428; Percent complete: 85.7%; Average loss: 2.2414
Iteration: 3429; Percent complete: 85.7%; Average loss: 2.1524
Iteration: 3430; Percent complete: 85.8%; Average loss: 2.3213
Iteration: 3431; Percent complete: 85.8%; Average loss: 2.1352
Iteration: 3432; Percent complete: 85.8%; Average loss: 2.3867
Iteration: 3433; Percent complete: 85.8%; Average loss: 2.4225
Iteration: 3434; Percent complete: 85.9%; Average loss: 2.4380
Iteration: 3435; Percent complete: 85.9%; Average loss: 2.3294
Iteration: 3436; Percent complete: 85.9%; Average loss: 2.3022
Iteration: 3437; Percent complete: 85.9%; Average loss: 2.3618
Iteration: 3438; Percent complete: 86.0%; Average loss: 2.3122
Iteration: 3439; Percent complete: 86.0%; Average loss: 2.3045
Iteration: 3440; Percent complete: 86.0%; Average loss: 2.1792
Iteration: 3441; Percent complete: 86.0%; Average loss: 2.1593
Iteration: 3442; Percent complete: 86.1%; Average loss: 2.1611
Iteration: 3443; Percent complete: 86.1%; Average loss:

Iteration: 3559; Percent complete: 89.0%; Average loss: 2.1991
Iteration: 3560; Percent complete: 89.0%; Average loss: 2.3868
Iteration: 3561; Percent complete: 89.0%; Average loss: 2.2613
Iteration: 3562; Percent complete: 89.0%; Average loss: 2.1725
Iteration: 3563; Percent complete: 89.1%; Average loss: 2.1583
Iteration: 3564; Percent complete: 89.1%; Average loss: 2.1248
Iteration: 3565; Percent complete: 89.1%; Average loss: 2.3328
Iteration: 3566; Percent complete: 89.1%; Average loss: 2.2688
Iteration: 3567; Percent complete: 89.2%; Average loss: 2.3447
Iteration: 3568; Percent complete: 89.2%; Average loss: 2.1629
Iteration: 3569; Percent complete: 89.2%; Average loss: 2.3799
Iteration: 3570; Percent complete: 89.2%; Average loss: 2.1816
Iteration: 3571; Percent complete: 89.3%; Average loss: 2.3387
Iteration: 3572; Percent complete: 89.3%; Average loss: 2.2473
Iteration: 3573; Percent complete: 89.3%; Average loss: 2.1285
Iteration: 3574; Percent complete: 89.3%; Average loss:

Iteration: 3690; Percent complete: 92.2%; Average loss: 2.3145
Iteration: 3691; Percent complete: 92.3%; Average loss: 2.1711
Iteration: 3692; Percent complete: 92.3%; Average loss: 1.9841
Iteration: 3693; Percent complete: 92.3%; Average loss: 2.1939
Iteration: 3694; Percent complete: 92.3%; Average loss: 2.3371
Iteration: 3695; Percent complete: 92.4%; Average loss: 2.5802
Iteration: 3696; Percent complete: 92.4%; Average loss: 2.2222
Iteration: 3697; Percent complete: 92.4%; Average loss: 2.3568
Iteration: 3698; Percent complete: 92.5%; Average loss: 2.2976
Iteration: 3699; Percent complete: 92.5%; Average loss: 2.0232
Iteration: 3700; Percent complete: 92.5%; Average loss: 2.2164
Iteration: 3701; Percent complete: 92.5%; Average loss: 2.2540
Iteration: 3702; Percent complete: 92.5%; Average loss: 2.2515
Iteration: 3703; Percent complete: 92.6%; Average loss: 2.0955
Iteration: 3704; Percent complete: 92.6%; Average loss: 2.2004
Iteration: 3705; Percent complete: 92.6%; Average loss:

Iteration: 3821; Percent complete: 95.5%; Average loss: 2.2645
Iteration: 3822; Percent complete: 95.5%; Average loss: 2.0010
Iteration: 3823; Percent complete: 95.6%; Average loss: 2.2654
Iteration: 3824; Percent complete: 95.6%; Average loss: 2.1211
Iteration: 3825; Percent complete: 95.6%; Average loss: 2.1202
Iteration: 3826; Percent complete: 95.7%; Average loss: 2.2830
Iteration: 3827; Percent complete: 95.7%; Average loss: 2.0870
Iteration: 3828; Percent complete: 95.7%; Average loss: 2.1725
Iteration: 3829; Percent complete: 95.7%; Average loss: 2.1127
Iteration: 3830; Percent complete: 95.8%; Average loss: 1.8756
Iteration: 3831; Percent complete: 95.8%; Average loss: 2.0835
Iteration: 3832; Percent complete: 95.8%; Average loss: 2.2059
Iteration: 3833; Percent complete: 95.8%; Average loss: 2.1514
Iteration: 3834; Percent complete: 95.9%; Average loss: 2.2645
Iteration: 3835; Percent complete: 95.9%; Average loss: 2.1595
Iteration: 3836; Percent complete: 95.9%; Average loss:

Iteration: 3952; Percent complete: 98.8%; Average loss: 2.2334
Iteration: 3953; Percent complete: 98.8%; Average loss: 2.0570
Iteration: 3954; Percent complete: 98.9%; Average loss: 2.0885
Iteration: 3955; Percent complete: 98.9%; Average loss: 2.2621
Iteration: 3956; Percent complete: 98.9%; Average loss: 2.0722
Iteration: 3957; Percent complete: 98.9%; Average loss: 2.2298
Iteration: 3958; Percent complete: 99.0%; Average loss: 2.1677
Iteration: 3959; Percent complete: 99.0%; Average loss: 2.0923
Iteration: 3960; Percent complete: 99.0%; Average loss: 2.0396
Iteration: 3961; Percent complete: 99.0%; Average loss: 2.1201
Iteration: 3962; Percent complete: 99.1%; Average loss: 2.2592
Iteration: 3963; Percent complete: 99.1%; Average loss: 2.4131
Iteration: 3964; Percent complete: 99.1%; Average loss: 2.2232
Iteration: 3965; Percent complete: 99.1%; Average loss: 2.1667
Iteration: 3966; Percent complete: 99.2%; Average loss: 2.0066
Iteration: 3967; Percent complete: 99.2%; Average loss:

In [56]:
# Set dropout layers to eval mode
encoder.eval()
decoder.eval()

# Initialize search module
searcher = GreedySearchDecoder(encoder, decoder)

evaluateInput(encoder, decoder, searcher, voc)

> Hello babe
Bot: hello .
> What's your name?
Bot: i m sorry .
> Why are you sorry?
Bot: because i m here .
> Here for what?
Bot: for what ?
> what for?
Bot: i don t know .
> what do you know?
Bot: i know .
> know what?
Bot: what ?
> what?
Bot: i m not hungry .
> I'll make you an offer you cannot refuse
Bot: i love you .
> Why do you love me?
Bot: because i love you .
> What is philosophy?
Bot: i don t know .
> Courage is what I seek
Bot: you re a good man .
> What about you?
Bot: i don t know .
> Are you courageous?
Error: Encountered unknown word.
> Are you brave?
Bot: yes .
> Are you happy?
Bot: i m fine .
> Are you hungry?
Bot: yes .
> What would you like to eat?
Bot: i don t know .
> How about Chinese food?
Bot: i m not .
> What kind of food would you like to eat?
Bot: i don t know .
> Which nationality are you?
Error: Encountered unknown word.
> Which country are you from?
Bot: yeah .
> The football fell to the ground
Bot: i m not asking you .
> Where is the football?
Bot: what ?