In [10]:
import pickle
from sklearn.model_selection import train_test_split
from torchtext.vocab import build_vocab_from_iterator
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import random

# Load the preprocessed dialogue pairs
with open('../data/preprocessed_dialogue_pairs.pkl', 'rb') as file:
    preprocessed_dialogue_pairs = pickle.load(file)

In [13]:
for pair in preprocessed_dialogue_pairs[:10]:
    print(pair)

(['can', 'we', 'make', 'this', 'quick', 'roxanne', 'korrine', 'and', 'andrew', 'barrett', 'are', 'having', 'an', 'incredibly', 'horrendous', 'public', 'break', 'up', 'on', 'the', 'quad', 'again'], ['well', 'i', 'thought', 'we', 'would', 'start', 'with', 'pronunciation', 'if', 'that', 'is', 'okay', 'with', 'you'])
(['well', 'i', 'thought', 'we', 'would', 'start', 'with', 'pronunciation', 'if', 'that', 'is', 'okay', 'with', 'you'], ['not', 'the', 'hacking', 'and', 'gagging', 'and', 'spitting', 'part', 'please'])
(['not', 'the', 'hacking', 'and', 'gagging', 'and', 'spitting', 'part', 'please'], ['okay', 'then', 'how', 'bout', 'we', 'try', 'out', 'some', 'french', 'cuisine', 'saturday', 'night'])
(['you', 'are', 'asking', 'me', 'out', 'that', 'is', 'so', 'cute', 'what', 'is', 'your', 'name', 'again'], ['forget', 'it'])
(['the', 'thing', 'is', 'cameron', 'i', 'am', 'at', 'the', 'mercy', 'of', 'a', 'particularly', 'hideous', 'breed', 'of', 'loser', 'my', 'sister', 'i', 'can', 'not', 'date', 

In [14]:
train_pairs, val_pairs = train_test_split(preprocessed_dialogue_pairs, test_size=0.2, random_state=42)

input_sequences = [pair[0] for pair in train_pairs]
target_sequences = [pair[1] for pair in train_pairs]

# Create a generator function to yield tokens
def yield_tokens(tokenized_sequences):
    for sequence in tokenized_sequences:
        yield sequence

# Create vocabulary mappings for the input and target sequences
special_tokens = ['<pad>', '<sos>', '<eos>', '<unk>']
input_vocab = build_vocab_from_iterator(yield_tokens(input_sequences), specials=special_tokens)
target_vocab = build_vocab_from_iterator(yield_tokens(target_sequences), specials=special_tokens)

# Set the default index for handling unknown tokens
input_vocab.set_default_index(input_vocab['<unk>'])
target_vocab.set_default_index(target_vocab['<unk>'])

In [23]:
for token, index in enumerate(input_vocab.get_itos()[:10]):
    print(f"Token: {token}, Index: {index}")
for token, index in enumerate(target_vocab.get_itos()[:10]):
    print(f"Token: {token}, Index: {index}")

Token: 0, Index: <pad>
Token: 1, Index: <sos>
Token: 2, Index: <eos>
Token: 3, Index: <unk>
Token: 4, Index: you
Token: 5, Index: i
Token: 6, Index: the
Token: 7, Index: to
Token: 8, Index: is
Token: 9, Index: not
Token: 0, Index: <pad>
Token: 1, Index: <sos>
Token: 2, Index: <eos>
Token: 3, Index: <unk>
Token: 4, Index: i
Token: 5, Index: you
Token: 6, Index: the
Token: 7, Index: to
Token: 8, Index: is
Token: 9, Index: not


In [24]:
from collections import Counter

first_list_lengths = [len(pair[0]) for pair in train_pairs]
second_list_lengths = [len(pair[1]) for pair in train_pairs]

# Count occurrences of each length
first_list_length_counts = Counter(first_list_lengths)
second_list_length_counts = Counter(second_list_lengths)

# Calculate percentages
total_first_list = len(first_list_lengths)
total_second_list = len(second_list_lengths)

first_list_length_percentages = {length: count / total_first_list * 100 for length, count in first_list_length_counts.items()}
second_list_length_percentages = {length: count / total_second_list * 100 for length, count in second_list_length_counts.items()}

# Order by length
ordered_first_list_percentages = sorted(first_list_length_percentages.items(), key=lambda x: x[0])
ordered_second_list_percentages = sorted(second_list_length_percentages.items(), key=lambda x: x[0])

# Calculate cumulative percentages
cumulative_first_list_percentages = []
cumulative_percentage = 0
for length, percentage in ordered_first_list_percentages:
    cumulative_percentage += percentage
    cumulative_first_list_percentages.append((length, cumulative_percentage))

cumulative_second_list_percentages = []
cumulative_percentage = 0
for length, percentage in ordered_second_list_percentages:
    cumulative_percentage += percentage
    cumulative_second_list_percentages.append((length, cumulative_percentage))

# Print the ordered and cumulative percentages
print("Ordered First List Length Percentages:", [(length, round(percentage, 4)) for length, percentage in ordered_first_list_percentages])
print("Cumulative First List Length Percentages:", [(length, round(percentage, 4)) for length, percentage in cumulative_first_list_percentages])

print("Ordered Second List Length Percentages:", [(length, round(percentage, 4)) for length, percentage in ordered_second_list_percentages])
print("Cumulative Second List Length Percentages:", [(length, round(percentage, 4)) for length, percentage in cumulative_second_list_percentages])


Ordered First List Length Percentages: [(2, 6.7464), (3, 7.816), (4, 9.6323), (5, 8.9494), (6, 7.981), (7, 6.9068), (8, 5.8006), (9, 4.8594), (10, 4.1938), (11, 3.714), (12, 3.2554), (13, 2.9479), (14, 2.5911), (15, 2.3475), (16, 1.9788), (17, 1.8916), (18, 1.6154), (19, 1.437), (20, 1.3585), (21, 1.2067), (22, 1.1202), (23, 0.9445), (24, 0.8872), (25, 0.7528), (26, 0.7368), (27, 0.6603), (28, 0.6203), (29, 0.5618), (30, 0.4726), (31, 0.4406), (32, 0.4226), (33, 0.3634), (34, 0.3534), (35, 0.3208), (36, 0.3108), (37, 0.2576), (38, 0.2409), (39, 0.2283), (40, 0.2077), (41, 0.1963), (42, 0.1924), (43, 0.1671), (44, 0.1564), (45, 0.1524), (46, 0.1251), (47, 0.1171), (48, 0.1318), (49, 0.1025), (50, 0.1052), (51, 0.0872), (52, 0.0972), (53, 0.0706), (54, 0.0626), (55, 0.0699), (56, 0.0626), (57, 0.0599), (58, 0.0526), (59, 0.0599), (60, 0.0606), (61, 0.0459), (62, 0.0433), (63, 0.0373), (64, 0.0373), (65, 0.0346), (66, 0.0366), (67, 0.0386), (68, 0.0293), (69, 0.0306), (70, 0.02), (71, 0.0

In [25]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, num_layers, dropout_p = 0.1):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(emb_dim, hidden_dim, num_layers, batch_first=True)
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input_seq):
        embedded = self.dropout(self.embedding(input_seq))
        output, hidden = self.gru(embedded)
        return output, hidden


class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim, num_layers):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(emb_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, encoder_outputs, hidden, decoder_input, max_length, target_tensor = None, teaching_force_ratio = 0.5):
        batch_size = encoder_outputs.size(0)
        decoder_outputs = []
        decoder_hidden = hidden 

        for i in range(max_length):
            decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)
            #decoder_output = decoder_output.detach()
            decoder_outputs.append(decoder_output)
            if target_tensor is not None and random.random() < teaching_force_ratio:
                # Teacher forcing
                decoder_input = target_tensor[:, i].unsqueeze(1).detach()
            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach()
        decoder_outputs = torch.cat(decoder_outputs, dim=1)
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        return decoder_outputs, decoder_hidden, None

    def forward_step(self, input, hidden):
        output = self.embedding(input)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.fc(output)
        return output, hidden

cuda


In [26]:
input_dim = len(input_vocab)
output_dim = len(target_vocab)
emb_dim = 128
hidden_dim = 128
num_layers = 1
MAX_LEN = 100
# Training loop
num_epochs = 1
batch_size = 32  # Adjust the batch size as per your requirements
print(input_dim)
print(output_dim)

46276
46616


In [130]:
# import torch

# # Clear GPU memory cache
# torch.cuda.empty_cache()

# # Iterate over all available GPUs
# for i in range(torch.cuda.device_count()):
#     current_device = torch.device(f'cuda:{i}')
#     print(current_device)
#     # Move to the current GPU
#     with torch.cuda.device(current_device):
#         # Iterate over model parameters and buffers
#         for obj in list(torch.nn.Module().parameters()) + list(torch.nn.Module().buffers()):
#             print(obj)
#             if obj is not None and obj.is_cuda:
#                 obj.data = None  # This will release the memory associated with the tensor

# # Optionally, clear the GPU memory cache again
# torch.cuda.empty_cache()


cuda:0


In [27]:
import gc 
gc.collect()

1573

In [28]:
# Instantiate the encoder and decoder and move them to the appropriate device
encoder = Encoder(input_dim, emb_dim, hidden_dim, num_layers).to(device)
decoder = Decoder(output_dim, emb_dim, hidden_dim, num_layers).to(device)

# Define the optimizer and move the parameters to the appropriate device
encoder_optimizer = optim.Adam(encoder.parameters(), lr=0.001)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)

# criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss(ignore_index=target_vocab['<pad>'])

# Create batches of train pairs
train_batches = [train_pairs[i:i+batch_size] for i in range(0, len(train_pairs), batch_size)]
val_batches = [val_pairs[i:i+batch_size] for i in range(0, len(val_pairs), batch_size)]
print(len(train_pairs))
print(len(train_batches))
print(len(val_pairs))
print(len(val_batches))

150244
4696
37561
1174


In [29]:
for epoch in range(num_epochs):
    encoder.train()
    decoder.train()
    total_loss = 0
    counter = 0
    start_time = time.time()

    for batch in train_batches[:]:
        counter += batch_size
        if counter % 10000 < batch_size:
            end_time = time.time()
            time_diff = end_time - start_time
            average_loss = total_loss / counter
            print(f"Time: {time_diff:.2f}s, Training Visited {counter // 10000 * 10000} lines, Loss: {average_loss:.4f}")

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        input_seqs = [pair[0] + ['<eos>'] for pair in batch]
        target_seqs = [['<sos>'] + pair[1] + ['<eos>'] for pair in batch]

        input_max_len = max(len(seq) for seq in input_seqs)
        target_max_len = max(len(seq) for seq in target_seqs)

        input_indices = [[input_vocab[token] for token in seq] + [input_vocab['<pad>']] * (input_max_len - len(seq)) for seq in input_seqs]
        target_indices = [[target_vocab[token] for token in seq] + [target_vocab['<pad>']] * (target_max_len - len(seq)) for seq in target_seqs]

        input_seq = torch.tensor(input_indices).to(device)
        target_seq = torch.tensor(target_indices).to(device)
        encoder_outputs, encoder_hidden = encoder(input_seq)
        decoder_input = target_seq[:, 0].unsqueeze(1).to(device)
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, decoder_input, min(MAX_LEN, target_max_len), target_tensor = target_seq, teaching_force_ratio = 0.5)
        loss = criterion(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),
            target_seq[:, :MAX_LEN].reshape(-1)
        )

        total_loss += loss.item()
        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()

    average_loss = total_loss / len(train_pairs)
    end_time = time.time()
    time_diff = end_time - start_time
    print(f"Time: {time_diff:.2f}s, Epoch: {epoch+1}, Training Loss: {average_loss:.4f}")

    encoder.eval()
    decoder.eval()
    with torch.no_grad():
        total_loss = 0
        counter = 0
        for batch in val_batches[:]:
            counter += batch_size
            if counter % 10000 < batch_size:
                end_time = time.time()
                time_diff = end_time - start_time
                average_loss = total_loss / counter
                print(f"Time: {time_diff:.2f}s, Validation Visited {counter // 10000 * 10000} lines, Loss: {average_loss:.4f}")

            input_seqs = [pair[0] + ['<eos>'] for pair in batch]
            target_seqs = [['<sos>'] + pair[1] + ['<eos>'] for pair in batch]

            input_max_len = max(len(seq) for seq in input_seqs)
            target_max_len = max(len(seq) for seq in target_seqs)

            input_indices = [[input_vocab[token] for token in seq] + [input_vocab['<pad>']] * (input_max_len - len(seq)) for seq in input_seqs]
            target_indices = [[target_vocab[token] for token in seq] + [target_vocab['<pad>']] * (target_max_len - len(seq)) for seq in target_seqs]

            input_seq = torch.tensor(input_indices).to(device)
            target_seq = torch.tensor(target_indices).to(device)
            encoder_outputs, encoder_hidden = encoder(input_seq)
            decoder_input = torch.tensor([target_vocab["<sos>"]] * input_seq.shape[0]).unsqueeze(1).to(device)
            decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, decoder_input, min(MAX_LEN, target_max_len), target_tensor = None)
            loss = criterion(
                decoder_outputs.view(-1, decoder_outputs.size(-1)),
                target_seq[:, :MAX_LEN].reshape(-1)
            )

            total_loss += loss.item()

        average_loss = total_loss / len(val_pairs)
        end_time = time.time()
        time_diff = end_time - start_time
        print(f"Time: {time_diff:.2f}s, Epoch: {epoch+1}, Validation Loss: {average_loss:.4f}")

Time: 30.13s, Training Visited 10000 lines, Loss: 0.1897
Time: 58.32s, Training Visited 20000 lines, Loss: 0.1831
Time: 85.87s, Training Visited 30000 lines, Loss: 0.1793
Time: 113.02s, Training Visited 40000 lines, Loss: 0.1770
Time: 139.15s, Training Visited 50000 lines, Loss: 0.1753
Time: 165.79s, Training Visited 60000 lines, Loss: 0.1740
Time: 193.54s, Training Visited 70000 lines, Loss: 0.1730
Time: 222.22s, Training Visited 80000 lines, Loss: 0.1721
Time: 249.79s, Training Visited 90000 lines, Loss: 0.1714
Time: 275.33s, Training Visited 100000 lines, Loss: 0.1708
Time: 302.11s, Training Visited 110000 lines, Loss: 0.1702
Time: 329.26s, Training Visited 120000 lines, Loss: 0.1697
Time: 357.27s, Training Visited 130000 lines, Loss: 0.1693
Time: 384.03s, Training Visited 140000 lines, Loss: 0.1689
Time: 411.45s, Training Visited 150000 lines, Loss: 0.1686
Time: 412.16s, Epoch: 1, Training Loss: 0.1686
Time: 426.35s, Validation Visited 10000 lines, Loss: 0.1774
Time: 440.42s, Valid

In [None]:
# for epoch in range(num_epochs):
#     encoder.train()
#     decoder.train()
#     total_loss = 0
#     total_mask = 0
#     counter = 0
#     start_time = time.time()

#     for batch in train_batches[:]:
#         counter += batch_size
#         if counter % 10000 < batch_size:
#             end_time = time.time()
#             time_diff = end_time - start_time
#             average_loss = total_loss / counter
#             print(f"Time: {time_diff:.2f}s, Training Visited {counter // 10000 * 10000} lines, Loss: {average_loss:.4f}")

#         encoder_optimizer.zero_grad()
#         decoder_optimizer.zero_grad()

#         # input_seqs = [pair[0] for pair in batch]
#         # target_seqs = [pair[1] for pair in batch]
#         # Add the <eos> token to the end of each input sequence
#         input_seqs = [pair[0] + ['<eos>'] for pair in batch]
#         # Add the <sos> token to the beginning and the <eos> token to the end of each target sequence
#         target_seqs = [['<sos>'] + pair[1] + ['<eos>'] for pair in batch]

#         input_max_len = max(len(seq) for seq in input_seqs)
#         target_max_len = max(len(seq) for seq in target_seqs)

#         input_indices = [[input_vocab[token] for token in seq] + [input_vocab['<pad>']] * (input_max_len - len(seq)) for seq in input_seqs]
#         target_indices = [[target_vocab[token] for token in seq] + [target_vocab['<pad>']] * (target_max_len - len(seq)) for seq in target_seqs]

#         input_seq = torch.tensor(input_indices).to(device)
#         target_seq = torch.tensor(target_indices).to(device)
#         # mask = (target_seq != target_vocab['<pad>']).to(device)

#         # print(input_seq.shape)
#         # print(target_seq.shape)
#         # print(mask.sum())
#         encoder_outputs, encoder_hidden = encoder(input_seq)
#         decoder_input = target_seq[:, 0].unsqueeze(1).to(device)
#         decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, decoder_input, min(MAX_LEN, target_max_len), target_tensor = target_seq, teaching_force_ratio = 0.5)
#         loss = criterion(
#             decoder_outputs.view(-1, decoder_outputs.size(-1)),
#             target_seq[:, :MAX_LEN].reshape(-1)
#         )

#         # loss = 0
#         # for token in range(target_seq.shape[1]):
#         #     decoder_output, decoder_hidden, decoder_cell = decoder(decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
#         #     # step_loss = criterion(decoder_output, target_seq[:, token])
#         #     step_loss = criterion(decoder_output[mask[:, token]], target_seq[:, token][mask[:, token]])
#         #     loss += step_loss
#         #     decoder_input = target_seq[:, token].unsqueeze(1)
#         total_loss += loss.item()
#         # total_mask += mask.sum()
#         loss.backward()
#         encoder_optimizer.step()
#         decoder_optimizer.step()

#     # average_loss = total_loss / total_mask
#     average_loss = total_loss / len(train_pairs)
#     # average_loss = total_loss / 10
#     end_time = time.time()
#     time_diff = end_time - start_time
#     print(f"Time: {time_diff:.2f}s, Epoch: {epoch+1}, Training Loss: {average_loss:.4f}")

#     encoder.eval()
#     decoder.eval()
#     with torch.no_grad():
#         total_loss = 0
#         total_mask = 0
#         counter = 0
#         for batch in val_batches[:]:
#             counter += batch_size
#             if counter % 10000 < batch_size:
#                 end_time = time.time()
#                 time_diff = end_time - start_time
#                 average_loss = total_loss / counter
#                 print(f"Time: {time_diff:.2f}s, Validation Visited {counter // 10000 * 10000} lines, Loss: {average_loss:.4f}")

#             # input_seqs = [pair[0] for pair in batch]
#             # target_seqs = [pair[1] for pair in batch]
#             # Add the <eos> token to the end of each input sequence
#             input_seqs = [pair[0] + ['<eos>'] for pair in batch]
#             # Add the <sos> token to the beginning and the <eos> token to the end of each target sequence
#             target_seqs = [['<sos>'] + pair[1] + ['<eos>'] for pair in batch]

#             input_max_len = max(len(seq) for seq in input_seqs)
#             target_max_len = max(len(seq) for seq in target_seqs)

#             input_indices = [[input_vocab[token] for token in seq] + [input_vocab['<pad>']] * (input_max_len - len(seq)) for seq in input_seqs]
#             target_indices = [[target_vocab[token] for token in seq] + [target_vocab['<pad>']] * (target_max_len - len(seq)) for seq in target_seqs]

#             input_seq = torch.tensor(input_indices).to(device)
#             target_seq = torch.tensor(target_indices).to(device)
#             # for simplicity, mask validation in the same way. Ignore all <pad>.
#             # mask = (target_seq != target_vocab['<pad>']).to(device)
#             encoder_outputs, encoder_hidden = encoder(input_seq)
#             decoder_input = torch.tensor([target_vocab["<sos>"]] * input_seq.shape[0]).unsqueeze(1).to(device)
#             decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, decoder_input, min(MAX_LEN, target_max_len), target_tensor = None)
#             loss = criterion(
#                 decoder_outputs.view(-1, decoder_outputs.size(-1)),
#                 target_seq[:, :MAX_LEN].reshape(-1)
#             )

#             # loss = 0
#             # for token in range(target_seq.shape[1]):
#             #     decoder_output, decoder_hidden, decoder_cell = decoder(decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
#             #     # step_loss = criterion(decoder_output.squeeze(1), target_seq[:, token])
#             #     step_loss = criterion(decoder_output[mask[:, token]], target_seq[:, token][mask[:, token]])
#             #     loss += step_loss
#             #     decoder_input = torch.argmax(decoder_output, dim=1).unsqueeze(1)
#             total_loss += loss.item()
#             # total_mask += mask.sum()

#         # average_loss = total_loss / total_mask
#         average_loss = total_loss / len(val_pairs)
#         # average_loss = total_loss / 10
#         end_time = time.time()
#         time_diff = end_time - start_time
#         print(f"Time: {time_diff:.2f}s, Epoch: {epoch+1}, Validation Loss: {average_loss:.4f}")

In [52]:
import torch
from torchtext.vocab import Vocab
import contractions
from string import punctuation
from nltk.tokenize import word_tokenize

def clean_and_tokenize_text(text):
    text = text.lower()
    text = contractions.fix(text)

    text = ''.join([c for c in text if c not in punctuation])
    tokens = word_tokenize(text)
    #stop_words = set(stopwords.words('english'))
    #tokens = [token for token in tokens if token not in stop_words]
    #lemmatizer = WordNetLemmatizer()
    #tokens = [lemmatizer.lemmatize(token) for token in tokens]
    tokens = [token.strip() for token in tokens if token.strip()]
    return tokens

# Preprocess the input string
input_string = 'how are you'
tokenized_input = clean_and_tokenize_text(input_string) + ['<eos>']

# Convert tokens to indices using the input vocabulary
input_seqs = torch.tensor([input_vocab[token] for token in tokenized_input]).unsqueeze(0).to(device) # Shape: [batch_size, sequence_length]

# Set the model to evaluation mode
encoder.eval()
decoder.eval()

# Pass the input through the encoder
encoder_outputs, encoder_hidden = encoder(input_seqs)
# Initialize the decoder's input
decoder_input = torch.tensor([target_vocab["<sos>"]] * 1).unsqueeze(1).to(device)
# decoder_input = torch.tensor([[target_vocab['<sos>']]]).to(device)  # Use the index for the start token
decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, decoder_input, min(MAX_LEN, target_max_len), target_tensor = None)

_, predicted_indices = torch.max(decoder_outputs, dim=-1)
predicted_tokens = [target_vocab.get_itos()[token_idx.item()] for token_idx in predicted_indices[0]]
predicted_text = ' '.join(predicted_tokens)
print(predicted_text)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\weiyu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<sos> i am not <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos> <eos>
