In [1]:
import os
from argparse import Namespace
from collections import Counter
import json
import re
import string

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm


class Vocabulary(object):
    """Class to process text and extract vocabulary for mapping"""

    def __init__(self, token_to_idx=None):
        """
        Args:
            token_to_idx (dict): a pre-existing map of tokens to indices
        """

        if token_to_idx is None:
            token_to_idx = {}
        self._token_to_idx = token_to_idx

        self._idx_to_token = {idx: token
                              for token, idx in self._token_to_idx.items()}

    def to_serializable(self):
        """ returns a dictionary that can be serialized """
        return {'token_to_idx': self._token_to_idx}

    @classmethod
    def from_serializable(cls, contents):
        """ instantiates the Vocabulary from a serialized dictionary """
        return cls(**contents)

    def add_token(self, token):
        """Update mapping dicts based on the token.

        Args:
            token (str): the item to add into the Vocabulary
        Returns:
            index (int): the integer corresponding to the token
        """
        if token in self._token_to_idx:
            index = self._token_to_idx[token]
        else:
            index = len(self._token_to_idx)
            self._token_to_idx[token] = index
            self._idx_to_token[index] = token
        return index

    def add_many(self, tokens):
        """Add a list of tokens into the Vocabulary

        Args:
            tokens (list): a list of string tokens
        Returns:
            indices (list): a list of indices corresponding to the tokens
        """
        return [self.add_token(token) for token in tokens]

    def lookup_token(self, token):
        """Retrieve the index associated with the token

        Args:
            token (str): the token to look up
        Returns:
            index (int): the index corresponding to the token
        """
        return self._token_to_idx[token]

    def lookup_index(self, index):
        """Return the token associated with the index

        Args:
            index (int): the index to look up
        Returns:
            token (str): the token corresponding to the index
        Raises:
            KeyError: if the index is not in the Vocabulary
        """
        if index not in self._idx_to_token:
            raise KeyError("the index (%d) is not in the Vocabulary" % index)
        return self._idx_to_token[index]

    def __str__(self):
        return "<Vocabulary(size=%d)>" % len(self)

    def __len__(self):
        return len(self._token_to_idx)


class SequenceVocabulary(Vocabulary):
    def __init__(self, token_to_idx=None, unk_token="<UNK>",
                 mask_token="<MASK>", begin_seq_token="<BEGIN>",
                 end_seq_token="<END>"):

        super(SequenceVocabulary, self).__init__(token_to_idx)

        self._mask_token = mask_token
        self._unk_token = unk_token
        self._begin_seq_token = begin_seq_token
        self._end_seq_token = end_seq_token

        self.mask_index = self.add_token(self._mask_token)
        self.unk_index = self.add_token(self._unk_token)
        self.begin_seq_index = self.add_token(self._begin_seq_token)
        self.end_seq_index = self.add_token(self._end_seq_token)

    def to_serializable(self):
        contents = super(SequenceVocabulary, self).to_serializable()
        contents.update({'unk_token': self._unk_token,
                         'mask_token': self._mask_token,
                         'begin_seq_token': self._begin_seq_token,
                         'end_seq_token': self._end_seq_token})
        return contents

    def lookup_token(self, token):
        """Retrieve the index associated with the token
          or the UNK index if token isn't present.

        Args:
            token (str): the token to look up
        Returns:
            index (int): the index corresponding to the token
        Notes:
            `unk_index` needs to be >=0 (having been added into the Vocabulary)
              for the UNK functionality
        """
        if self.unk_index >= 0:
            return self._token_to_idx.get(token, self.unk_index)
        else:
            return self._token_to_idx[token]


class SurnameVectorizer(object):
    """ The Vectorizer which coordinates the Vocabularies and puts them to use"""

    def __init__(self, char_vocab, nationality_vocab):
        """
        Args:
            char_vocab (SequenceVocabulary): maps words to integers
            nationality_vocab (Vocabulary): maps nationalities to integers
        """
        self.char_vocab = char_vocab
        self.nationality_vocab = nationality_vocab

    def vectorize(self, surname, vector_length=-1):
        """Vectorize a surname into a vector of observations and targets

        The outputs are the vectorized surname split into two vectors:
            surname[:-1] and surname[1:]
        At each timestep, the first vector is the observation and the second vector is the target.

        Args:
            surname (str): the surname to be vectorized
            vector_length (int): an argument for forcing the length of index vector
        Returns:
            a tuple: (from_vector, to_vector)
            from_vector (numpy.ndarray): the observation vector
            to_vector (numpy.ndarray): the target prediction vector
        """
        indices = [self.char_vocab.begin_seq_index]
        indices.extend(self.char_vocab.lookup_token(token) for token in surname)
        indices.append(self.char_vocab.end_seq_index)

        if vector_length < 0:
            vector_length = len(indices) - 1

        from_vector = np.zeros(vector_length, dtype=np.int64)
        from_indices = indices[:-1]
        from_vector[:len(from_indices)] = from_indices
        from_vector[len(from_indices):] = self.char_vocab.mask_index

        to_vector = np.zeros(vector_length, dtype=np.int64)
        to_indices = indices[1:]
        to_vector[:len(to_indices)] = to_indices
        to_vector[len(to_indices):] = self.char_vocab.mask_index

        return from_vector, to_vector

    @classmethod
    def from_dataframe(cls, surname_df):
        """Instantiate the vectorizer from the dataset dataframe

        Args:
            surname_df (pandas.DataFrame): the surname dataset
        Returns:
            an instance of the SurnameVectorizer
        """
        char_vocab = SequenceVocabulary()
        nationality_vocab = Vocabulary()

        for index, row in surname_df.iterrows():
            for char in row.surname:
                char_vocab.add_token(char)
            nationality_vocab.add_token(row.nationality)

        return cls(char_vocab, nationality_vocab)

    @classmethod
    def from_serializable(cls, contents):
        """Instantiate the vectorizer from saved contents

        Args:
            contents (dict): a dict holding two vocabularies for this vectorizer
                This dictionary is created using `vectorizer.to_serializable()`
        Returns:
            an instance of SurnameVectorizer
        """
        char_vocab = SequenceVocabulary.from_serializable(contents['char_vocab'])
        nat_vocab = Vocabulary.from_serializable(contents['nationality_vocab'])

        return cls(char_vocab=char_vocab, nationality_vocab=nat_vocab)

    def to_serializable(self):
        """ Returns the serializable contents """
        return {'char_vocab': self.char_vocab.to_serializable(),
                'nationality_vocab': self.nationality_vocab.to_serializable()}



In [2]:
class SurnameDataset(Dataset):
    def __init__(self, surname_df, vectorizer):
        """
        Args:
            surname_df (pandas.DataFrame): the dataset
            vectorizer (SurnameVectorizer): vectorizer instatiated from dataset
        """
        self.surname_df = surname_df
        self._vectorizer = vectorizer

        self._max_seq_length = max(map(len, self.surname_df.surname)) + 2

        self.train_df = self.surname_df[self.surname_df.split == 'train']
        self.train_size = len(self.train_df)

        self.val_df = self.surname_df[self.surname_df.split == 'val']
        self.validation_size = len(self.val_df)

        self.test_df = self.surname_df[self.surname_df.split == 'test']
        self.test_size = len(self.test_df)

        self._lookup_dict = {'train': (self.train_df, self.train_size),
                             'val': (self.val_df, self.validation_size),
                             'test': (self.test_df, self.test_size)}

        self.set_split('train')

    @classmethod
    def load_dataset_and_make_vectorizer(cls, surname_csv):
        """Load dataset and make a new vectorizer from scratch

        Args:
            surname_csv (str): location of the dataset
        Returns:
            an instance of SurnameDataset
        """

        surname_df = pd.read_csv(surname_csv)
        return cls(surname_df, SurnameVectorizer.from_dataframe(surname_df))

    @classmethod
    def load_dataset_and_load_vectorizer(cls, surname_csv, vectorizer_filepath):
        """Load dataset and the corresponding vectorizer.
        Used in the case in the vectorizer has been cached for re-use

        Args:
            surname_csv (str): location of the dataset
            vectorizer_filepath (str): location of the saved vectorizer
        Returns:
            an instance of SurnameDataset
        """
        surname_df = pd.read_csv(surname_csv)
        vectorizer = cls.load_vectorizer_only(vectorizer_filepath)
        return cls(surname_df, vectorizer)

    @staticmethod
    def load_vectorizer_only(vectorizer_filepath):
        """a static method for loading the vectorizer from file

        Args:
            vectorizer_filepath (str): the location of the serialized vectorizer
        Returns:
            an instance of SurnameVectorizer
        """
        with open(vectorizer_filepath) as fp:
            return SurnameVectorizer.from_serializable(json.load(fp))

    def save_vectorizer(self, vectorizer_filepath):
        """saves the vectorizer to disk using json

        Args:
            vectorizer_filepath (str): the location to save the vectorizer
        """
        with open(vectorizer_filepath, "w") as fp:
            json.dump(self._vectorizer.to_serializable(), fp)

    def get_vectorizer(self):
        """ returns the vectorizer """
        return self._vectorizer

    def set_split(self, split="train"):
        self._target_split = split
        self._target_df, self._target_size = self._lookup_dict[split]

    def __len__(self):
        return self._target_size

    def __getitem__(self, index):
        """the primary entry point method for PyTorch datasets

        Args:
            index (int): the index to the data point
        Returns:
            a dictionary holding the data point: (x_data, y_target, class_index)
        """
        row = self._target_df.iloc[index]

        from_vector, to_vector = \
            self._vectorizer.vectorize(row.surname, self._max_seq_length)

        nationality_index = \
            self._vectorizer.nationality_vocab.lookup_token(row.nationality)

        return {'x_data': from_vector,
                'y_target': to_vector,
                'class_index': nationality_index}

    def get_num_batches(self, batch_size):
        """Given a batch size, return the number of batches in the dataset

        Args:
            batch_size (int)
        Returns:
            number of batches in the dataset
        """
        return len(self) // batch_size


def generate_batches(dataset, batch_size, shuffle=True,
                     drop_last=True, device="cpu"):
    """
    A generator function which wraps the PyTorch DataLoader. It will
      ensure each tensor is on the write device location.
    """
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size,
                            shuffle=shuffle, drop_last=drop_last)

    for data_dict in dataloader:
        out_data_dict = {}
        for name, tensor in data_dict.items():
            out_data_dict[name] = data_dict[name].to(device)
        yield out_data_dict


class SurnameGenerationModel(nn.Module):
    def __init__(self, char_embedding_size, char_vocab_size, num_nationalities,
                 rnn_hidden_size, batch_first=True, padding_idx=0, dropout_p=0.5):
        """
        Args:
            char_embedding_size (int): The size of the character embeddings
            char_vocab_size (int): The number of characters to embed
            num_nationalities (int): The size of the prediction vector
            rnn_hidden_size (int): The size of the RNN's hidden state
            batch_first (bool): Informs whether the input tensors will
                have batch or the sequence on the 0th dimension
            padding_idx (int): The index for the tensor padding;
                see torch.nn.Embedding
            dropout_p (float): the probability of zeroing activations using
                the dropout method.  higher means more likely to zero.
        """
        super(SurnameGenerationModel, self).__init__()

        self.char_emb = nn.Embedding(num_embeddings=char_vocab_size,
                                     embedding_dim=char_embedding_size,
                                     padding_idx=padding_idx)

        self.nation_emb = nn.Embedding(num_embeddings=num_nationalities,
                                       embedding_dim=rnn_hidden_size)

        self.rnn = nn.GRU(input_size=char_embedding_size,
                          hidden_size=rnn_hidden_size,
                          batch_first=batch_first)

        self.fc = nn.Linear(in_features=rnn_hidden_size,
                            out_features=char_vocab_size)

        self._dropout_p = dropout_p

    def forward(self, x_in, nationality_index, apply_softmax=False):
        """The forward pass of the model

        Args:
            x_in (torch.Tensor): an input data tensor.
                x_in.shape should be (batch, max_seq_size)
            nationality_index (torch.Tensor): The index of the nationality for each data point
                Used to initialize the hidden state of the RNN
            apply_softmax (bool): a flag for the softmax activation
                should be false if used with the Cross Entropy losses
        Returns:
            the resulting tensor. tensor.shape should be (batch, char_vocab_size)
        """
        x_embedded = self.char_emb(x_in)

        # hidden_size: (num_layers * num_directions, batch_size, rnn_hidden_size)
        nationality_embedded = self.nation_emb(nationality_index).unsqueeze(0)

        y_out, _ = self.rnn(x_embedded, nationality_embedded)

        batch_size, seq_size, feat_size = y_out.shape
        y_out = y_out.contiguous().view(batch_size * seq_size, feat_size)

        y_out = self.fc(F.dropout(y_out, p=self._dropout_p))

        if apply_softmax:
            y_out = F.softmax(y_out, dim=1)

        new_feat_size = y_out.shape[-1]
        y_out = y_out.view(batch_size, seq_size, new_feat_size)

        return y_out


def sample_from_model(model, vectorizer, nationalities, sample_size=20,
                      temperature=1.0):
    """Sample a sequence of indices from the model

    Args:
        model (SurnameGenerationModel): the trained model
        vectorizer (SurnameVectorizer): the corresponding vectorizer
        nationalities (list): a list of integers representing nationalities
        sample_size (int): the max length of the samples
        temperature (float): accentuates or flattens
            the distribution.
            0.0 < temperature < 1.0 will make it peakier.
            temperature > 1.0 will make it more uniform
    Returns:
        indices (torch.Tensor): the matrix of indices;
        shape = (num_samples, sample_size)
    """
    num_samples = len(nationalities)
    begin_seq_index = [vectorizer.char_vocab.begin_seq_index
                       for _ in range(num_samples)]
    begin_seq_index = torch.tensor(begin_seq_index,
                                   dtype=torch.int64).unsqueeze(dim=1)
    indices = [begin_seq_index]
    nationality_indices = torch.tensor(nationalities, dtype=torch.int64).unsqueeze(dim=0)
    h_t = model.nation_emb(nationality_indices)

    for time_step in range(sample_size):
        x_t = indices[time_step]
        x_emb_t = model.char_emb(x_t)
        rnn_out_t, h_t = model.rnn(x_emb_t, h_t)
        prediction_vector = model.fc(rnn_out_t.squeeze(dim=1))
        probability_vector = F.softmax(prediction_vector / temperature, dim=1)
        indices.append(torch.multinomial(probability_vector, num_samples=1))
    indices = torch.stack(indices).squeeze().permute(1, 0)
    return indices


def decode_samples(sampled_indices, vectorizer):
    """Transform indices into the string form of a surname

    Args:
        sampled_indices (torch.Tensor): the inidces from `sample_from_model`
        vectorizer (SurnameVectorizer): the corresponding vectorizer
    """
    decoded_surnames = []
    vocab = vectorizer.char_vocab

    for sample_index in range(sampled_indices.shape[0]):
        surname = ""
        for time_step in range(sampled_indices.shape[1]):
            sample_item = sampled_indices[sample_index, time_step].item()
            if sample_item == vocab.begin_seq_index:
                continue
            elif sample_item == vocab.end_seq_index:
                break
            else:
                surname += vocab.lookup_index(sample_item)
        decoded_surnames.append(surname)
    return decoded_surnames


def make_train_state(args):
    return {'stop_early': False,
            'early_stopping_step': 0,
            'early_stopping_best_val': 1e8,
            'learning_rate': args.learning_rate,
            'epoch_index': 0,
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_loss': -1,
            'test_acc': -1,
            'model_filename': args.model_state_file}


def update_train_state(args, model, train_state):
    """Handle the training state updates.
    Components:
     - Early Stopping: Prevent overfitting.
     - Model Checkpoint: Model is saved if the model is better

    :param args: main arguments
    :param model: model to train
    :param train_state: a dictionary representing the training state values
    :returns:
        a new train_state
    """

    # Save one model at least
    if train_state['epoch_index'] == 0:
        torch.save(model.state_dict(), train_state['model_filename'])
        train_state['stop_early'] = False

    # Save model if performance improved
    elif train_state['epoch_index'] >= 1:
        loss_tm1, loss_t = train_state['val_loss'][-2:]

        # If loss worsened
        if loss_t >= loss_tm1:
            # Update step
            train_state['early_stopping_step'] += 1
        # Loss decreased
        else:
            # Save the best model
            if loss_t < train_state['early_stopping_best_val']:
                torch.save(model.state_dict(), train_state['model_filename'])
                train_state['early_stopping_best_val'] = loss_t

            # Reset early stopping step
            train_state['early_stopping_step'] = 0

        # Stop early ?
        train_state['stop_early'] = \
            train_state['early_stopping_step'] >= args.early_stopping_criteria

    return train_state


def normalize_sizes(y_pred, y_true):
    """Normalize tensor sizes

    Args:
        y_pred (torch.Tensor): the output of the model
            If a 3-dimensional tensor, reshapes to a matrix
        y_true (torch.Tensor): the target predictions
            If a matrix, reshapes to be a vector
    """
    if len(y_pred.size()) == 3:
        y_pred = y_pred.contiguous().view(-1, y_pred.size(2))
    if len(y_true.size()) == 2:
        y_true = y_true.contiguous().view(-1)
    return y_pred, y_true


def compute_accuracy(y_pred, y_true, mask_index):
    y_pred, y_true = normalize_sizes(y_pred, y_true)

    _, y_pred_indices = y_pred.max(dim=1)

    correct_indices = torch.eq(y_pred_indices, y_true).float()
    valid_indices = torch.ne(y_true, mask_index).float()

    n_correct = (correct_indices * valid_indices).sum().item()
    n_valid = valid_indices.sum().item()

    return n_correct / n_valid * 100


def sequence_loss(y_pred, y_true, mask_index):
    y_pred, y_true = normalize_sizes(y_pred, y_true)
    return F.cross_entropy(y_pred, y_true, ignore_index=mask_index)

def set_seed_everywhere(seed, cuda):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)

def handle_dirs(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)


args = Namespace(
    # Data and Path information
    surname_csv="data/surnames/surnames_with_splits.csv",
    vectorizer_file="vectorizer.json",
    model_state_file="model.pth",
    save_dir="model_storage/ch7/model2_conditioned_surname_generation",
    # Model hyper parameters
    char_embedding_size=32,
    rnn_hidden_size=32,
    # Training hyper parameters
    seed=1337,
    learning_rate=0.001,
    batch_size=128,
    num_epochs=50,
    early_stopping_criteria=5,
    # Runtime options
    catch_keyboard_interrupt=True,
    cuda=True,
    expand_filepaths_to_save_dir=True,
    reload_from_files=False,
)

if args.expand_filepaths_to_save_dir:
    args.vectorizer_file = os.path.join(args.save_dir,
                                        args.vectorizer_file)

    args.model_state_file = os.path.join(args.save_dir,
                                         args.model_state_file)

    print("Expanded filepaths: ")
    print("\t{}".format(args.vectorizer_file))
    print("\t{}".format(args.model_state_file))

# Check CUDA
if not torch.cuda.is_available():
    args.cuda = False

args.device = torch.device("cuda" if args.cuda else "cpu")

print("Using CUDA: {}".format(args.cuda))

# Set seed for reproducibility
set_seed_everywhere(args.seed, args.cuda)

# handle dirs
handle_dirs(args.save_dir)

if args.reload_from_files:
    # training from a checkpoint
    dataset = SurnameDataset.load_dataset_and_load_vectorizer(args.surname_csv,
                                                              args.vectorizer_file)
else:
    # create dataset and vectorizer
    dataset = SurnameDataset.load_dataset_and_make_vectorizer(args.surname_csv)
    dataset.save_vectorizer(args.vectorizer_file)

vectorizer = dataset.get_vectorizer()

model = SurnameGenerationModel(char_embedding_size=args.char_embedding_size,
                               char_vocab_size=len(vectorizer.char_vocab),
                               num_nationalities=len(vectorizer.nationality_vocab),
                               rnn_hidden_size=args.rnn_hidden_size,
                               padding_idx=vectorizer.char_vocab.mask_index,
                               dropout_p=0.5)

mask_index = vectorizer.char_vocab.mask_index

model = model.to(args.device)

optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min', factor=0.5,
                                                 patience=1)
train_state = make_train_state(args)

epoch_bar = tqdm(desc='training routine',
                          total=args.num_epochs,
                          position=0)

dataset.set_split('train')
train_bar = tqdm(desc='split=train',
                          total=dataset.get_num_batches(args.batch_size),
                          position=1,
                          leave=True)
dataset.set_split('val')
val_bar = tqdm(desc='split=val',
                        total=dataset.get_num_batches(args.batch_size),
                        position=1,
                        leave=True)

try:
    for epoch_index in range(args.num_epochs):
        print(f"\nEpoch [{epoch_index + 1}/{args.num_epochs}]")
        print("-" * 50)
        train_state['epoch_index'] = epoch_index

        # Iterate over training dataset

        # setup: batch generator, set loss and acc to 0, set train mode on
        dataset.set_split('train')
        batch_generator = generate_batches(dataset,
                                           batch_size=args.batch_size,
                                           device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        model.train()

        for batch_index, batch_dict in enumerate(batch_generator):
            # the training routine is these 5 steps:

            # --------------------------------------
            # step 1. zero the gradients
            optimizer.zero_grad()

            # step 2. compute the output
            y_pred = model(x_in=batch_dict['x_data'],
                           nationality_index=batch_dict['class_index'])

            # step 3. compute the loss
            loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index)

            # step 4. use loss to produce gradients
            loss.backward()

            # step 5. use optimizer to take gradient step
            optimizer.step()
            # -----------------------------------------
            # compute the  running loss and running accuracy
            running_loss += (loss.item() - running_loss) / (batch_index + 1)
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index)
            running_acc += (acc_t - running_acc) / (batch_index + 1)

            # update bar
            if batch_index % 25 == 0:
                train_bar.set_postfix(loss=running_loss,
                                      acc=running_acc,
                                      epoch=epoch_index)
            train_bar.update()

        train_state['train_loss'].append(running_loss)
        train_state['train_acc'].append(running_acc)

        # Iterate over val dataset

        # setup: batch generator, set loss and acc to 0; set eval mode on
        dataset.set_split('val')
        batch_generator = generate_batches(dataset,
                                           batch_size=args.batch_size,
                                           device=args.device)
        running_loss = 0.
        running_acc = 0.
        model.eval()

        for batch_index, batch_dict in enumerate(batch_generator):
            # compute the output
            y_pred = model(x_in=batch_dict['x_data'],
                           nationality_index=batch_dict['class_index'])

            # step 3. compute the loss
            loss = sequence_loss(y_pred, batch_dict['y_target'], mask_index)

            # compute the  running loss and running accuracy
            running_loss += (loss.item() - running_loss) / (batch_index + 1)
            acc_t = compute_accuracy(y_pred, batch_dict['y_target'], mask_index)
            running_acc += (acc_t - running_acc) / (batch_index + 1)

            # Update bar
            if batch_index % 25 == 0:
                val_bar.set_postfix(loss=running_loss, acc=running_acc,
                                    epoch=epoch_index)
            val_bar.update()

        train_state['val_loss'].append(running_loss)
        train_state['val_acc'].append(running_acc)

        train_state = update_train_state(args=args, model=model,
                                         train_state=train_state)

        scheduler.step(train_state['val_loss'][-1])

        if train_state['stop_early']:
            break

        # move model to cpu for sampling

        nationalities = np.random.choice(np.arange(len(vectorizer.nationality_vocab)), replace=True, size=2)
        model = model.cpu()
        sampled_surnames = decode_samples(
            sample_from_model(model, vectorizer, nationalities=nationalities),
            vectorizer)

        sample1 = "{}->{}".format(vectorizer.nationality_vocab.lookup_index(nationalities[0]),
                                  sampled_surnames[0])
        sample2 = "{}->{}".format(vectorizer.nationality_vocab.lookup_index(nationalities[1]),
                                  sampled_surnames[1])
        if batch_index % 25 == 0:
            epoch_bar.set_postfix(sample1=sample1,
                                  sample2=sample2)
        # move model back to whichever device it should be on
        model = model.to(args.device)

        train_bar.n = 0
        val_bar.n = 0
        epoch_bar.update()

except KeyboardInterrupt:
    print("Exiting loop")


Expanded filepaths: 
	model_storage/ch7/model2_conditioned_surname_generation\vectorizer.json
	model_storage/ch7/model2_conditioned_surname_generation\model.pth
Using CUDA: False


training routine:   0%|                                                                         | 0/50 [00:00<?, ?it/s]
split=train:   0%|                                                                              | 0/60 [00:00<?, ?it/s][A
split=val:   0%|                                                                                | 0/12 [00:00<?, ?it/s][A
split=train:   0%|                                               | 0/60 [00:00<?, ?it/s, acc=0.815, epoch=0, loss=4.54][A
split=train:   7%|██▌                                    | 4/60 [00:00<00:01, 31.25it/s, acc=0.815, epoch=0, loss=4.54][A


Epoch [1/50]
--------------------------------------------------



split=train:  13%|█████▏                                 | 8/60 [00:00<00:01, 34.07it/s, acc=0.815, epoch=0, loss=4.54][A
split=train:  22%|████████▏                             | 13/60 [00:00<00:01, 37.29it/s, acc=0.815, epoch=0, loss=4.54][A
split=train:  28%|██████████▊                           | 17/60 [00:00<00:01, 37.25it/s, acc=0.815, epoch=0, loss=4.54][A
split=train:  35%|█████████████▎                        | 21/60 [00:00<00:01, 37.89it/s, acc=0.815, epoch=0, loss=4.54][A
split=train:  42%|████████████████▎                      | 25/60 [00:00<00:00, 37.89it/s, acc=3.37, epoch=0, loss=4.38][A
split=train:  43%|████████████████▉                      | 26/60 [00:00<00:00, 38.67it/s, acc=3.37, epoch=0, loss=4.38][A
split=train:  50%|███████████████████▌                   | 30/60 [00:00<00:00, 37.94it/s, acc=3.37, epoch=0, loss=4.38][A
split=train:  57%|██████████████████████                 | 34/60 [00:00<00:00, 38.10it/s, acc=3.37, epoch=0, loss=4.38][A
split=train:  6


Epoch [2/50]
--------------------------------------------------



split=train:  42%|█████████████████                        | 25/60 [00:02<00:00, 37.11it/s, acc=15, epoch=1, loss=3.29][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:03<00:00, 37.11it/s, acc=15.5, epoch=1, loss=3.23][A
split=val:   0%|                                          | 0/12 [00:03<00:02,  5.32it/s, acc=18.5, epoch=1, loss=3.11][A
training routine:   4%|██▌                                                              | 2/50 [00:03<01:29,  1.86s/it][A
split=train:   0%|                                        | 0/60 [00:03<00:01, 37.11it/s, acc=18.8, epoch=2, loss=3.08][A


Epoch [3/50]
--------------------------------------------------



split=train:  42%|█████████████████                        | 25/60 [00:04<00:00, 37.11it/s, acc=18, epoch=2, loss=3.06][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:05<00:00, 37.11it/s, acc=18.2, epoch=2, loss=3.03][A
training routine:   6%|███▉                                                             | 3/50 [00:05<01:25,  1.82s/it][A
split=train:   0%|                                        | 0/60 [00:05<00:01, 37.11it/s, acc=19.5, epoch=3, loss=2.99][A


Epoch [4/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:06<00:00, 37.11it/s, acc=19.1, epoch=3, loss=2.93][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:06<00:00, 37.11it/s, acc=19.7, epoch=3, loss=2.91][A
training routine:   8%|█████▏                                                           | 4/50 [00:07<01:21,  1.77s/it][A
split=train:   0%|                                        | 0/60 [00:07<00:01, 37.11it/s, acc=20.8, epoch=4, loss=2.87][A


Epoch [5/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:07<00:00, 37.11it/s, acc=21.1, epoch=4, loss=2.83][A
split=train:  83%|█████████████████████████████████▎      | 50/60 [00:08<00:00, 37.11it/s, acc=21.5, epoch=4, loss=2.8][A
training routine:  10%|██████▌                                                          | 5/50 [00:08<01:18,  1.74s/it][A
split=train:   0%|                                        | 0/60 [00:08<00:01, 37.11it/s, acc=21.9, epoch=5, loss=2.77][A


Epoch [6/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:09<00:00, 37.11it/s, acc=22.2, epoch=5, loss=2.75][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:10<00:00, 37.11it/s, acc=22.7, epoch=5, loss=2.74][A
training routine:  12%|███████▊                                                         | 6/50 [00:10<01:17,  1.76s/it][A
split=train:   0%|                                        | 0/60 [00:10<00:01, 37.11it/s, acc=22.5, epoch=6, loss=2.73][A


Epoch [7/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [00:11<00:00, 37.11it/s, acc=23.3, epoch=6, loss=2.7][A
split=train:  83%|█████████████████████████████████▎      | 50/60 [00:12<00:00, 37.11it/s, acc=23.4, epoch=6, loss=2.7][A
training routine:  14%|█████████                                                        | 7/50 [00:12<01:16,  1.78s/it][A
split=train:   0%|                                        | 0/60 [00:12<00:01, 37.11it/s, acc=23.9, epoch=7, loss=2.65][A


Epoch [8/50]
--------------------------------------------------



split=train:  42%|█████████████████                        | 25/60 [00:13<00:00, 37.11it/s, acc=24, epoch=7, loss=2.67][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:13<00:00, 37.11it/s, acc=24.2, epoch=7, loss=2.66][A
training routine:  16%|██████████▍                                                      | 8/50 [00:14<01:15,  1.79s/it][A
split=train:   0%|                                        | 0/60 [00:14<00:01, 37.11it/s, acc=23.7, epoch=8, loss=2.69][A


Epoch [9/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:14<00:00, 37.11it/s, acc=24.6, epoch=8, loss=2.63][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:15<00:00, 37.11it/s, acc=24.6, epoch=8, loss=2.63][A
training routine:  18%|███████████▋                                                     | 9/50 [00:15<01:12,  1.76s/it][A
split=train:   0%|                                        | 0/60 [00:16<00:01, 37.11it/s, acc=24.9, epoch=9, loss=2.59][A


Epoch [10/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:16<00:00, 37.11it/s, acc=24.7, epoch=9, loss=2.61][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:17<00:00, 37.11it/s, acc=24.9, epoch=9, loss=2.61][A
training routine:  20%|████████████▊                                                   | 10/50 [00:17<01:09,  1.74s/it][A
split=train:   0%|                                         | 0/60 [00:17<00:01, 37.11it/s, acc=26, epoch=10, loss=2.61][A


Epoch [11/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:18<00:00, 37.11it/s, acc=25.5, epoch=10, loss=2.59][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:18<00:00, 37.11it/s, acc=25.4, epoch=10, loss=2.59][A
training routine:  22%|██████████████                                                  | 11/50 [00:19<01:07,  1.73s/it][A
split=train:   0%|                                       | 0/60 [00:19<00:01, 37.11it/s, acc=23.1, epoch=11, loss=2.64][A


Epoch [12/50]
--------------------------------------------------



split=train:  40%|███████████████▏                      | 24/60 [00:20<00:00, 37.11it/s, acc=23.1, epoch=11, loss=2.64][A
split=val:   0%|                                          | 0/12 [00:20<00:03,  3.09it/s, acc=25.9, epoch=10, loss=2.6][A
split=train:  42%|███████████████▊                      | 25/60 [00:20<00:00, 37.11it/s, acc=25.5, epoch=11, loss=2.58][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:20<00:00, 37.11it/s, acc=25.7, epoch=11, loss=2.58][A
split=train: 100%|██████████████████████████████████████| 60/60 [00:20<00:00,  1.87s/it, acc=25.7, epoch=11, loss=2.58][A
split=val:   0%|                                         | 0/12 [00:20<00:03,  3.09it/s, acc=27.7, epoch=11, loss=2.56][A
training routine:  24%|███████████████▎                                                | 12/50 [00:21<01:05,  1.72s/it][A
split=train:   0%|                                       | 0/60 [00:21<01:52,  1.87s/it, acc=23.1, epoch=12, loss=2.65][A


Epoch [13/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:21<01:05,  1.87s/it, acc=26.1, epoch=12, loss=2.56][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:22<00:18,  1.87s/it, acc=26.1, epoch=12, loss=2.56][A
training routine:  26%|████████████████▋                                               | 13/50 [00:23<01:05,  1.78s/it][A
split=train:   0%|                                       | 0/60 [00:23<01:52,  1.87s/it, acc=25.5, epoch=13, loss=2.55][A


Epoch [14/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:23<01:05,  1.87s/it, acc=26.2, epoch=13, loss=2.55][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:24<00:18,  1.87s/it, acc=26.4, epoch=13, loss=2.55][A
training routine:  28%|█████████████████▉                                              | 14/50 [00:24<01:03,  1.76s/it][A
split=train:   0%|                                       | 0/60 [00:24<01:52,  1.87s/it, acc=26.6, epoch=14, loss=2.55][A


Epoch [15/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:25<01:05,  1.87s/it, acc=26.6, epoch=14, loss=2.53][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:25<00:18,  1.87s/it, acc=26.4, epoch=14, loss=2.54][A
training routine:  30%|███████████████████▏                                            | 15/50 [00:26<01:00,  1.73s/it][A
split=train:   0%|                                       | 0/60 [00:26<01:52,  1.87s/it, acc=26.2, epoch=15, loss=2.51][A


Epoch [16/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:27<01:05,  1.87s/it, acc=26.3, epoch=15, loss=2.53][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:27<00:18,  1.87s/it, acc=26.6, epoch=15, loss=2.53][A
training routine:  32%|████████████████████▍                                           | 16/50 [00:28<00:58,  1.71s/it][A
split=train:   0%|                                       | 0/60 [00:28<01:52,  1.87s/it, acc=28.5, epoch=16, loss=2.46][A


Epoch [17/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:28<01:05,  1.87s/it, acc=26.5, epoch=16, loss=2.53][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:29<00:18,  1.87s/it, acc=26.8, epoch=16, loss=2.52][A
training routine:  34%|█████████████████████▊                                          | 17/50 [00:29<00:55,  1.69s/it][A
split=train:   0%|                                         | 0/60 [00:29<01:52,  1.87s/it, acc=27, epoch=17, loss=2.51][A


Epoch [18/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:30<01:05,  1.87s/it, acc=26.6, epoch=17, loss=2.53][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:30<00:18,  1.87s/it, acc=26.9, epoch=17, loss=2.51][A
training routine:  36%|███████████████████████                                         | 18/50 [00:31<00:53,  1.68s/it][A
split=train:   0%|                                       | 0/60 [00:31<01:52,  1.87s/it, acc=24.7, epoch=18, loss=2.57][A


Epoch [19/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:31<01:05,  1.87s/it, acc=26.9, epoch=18, loss=2.51][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:32<00:18,  1.87s/it, acc=27.1, epoch=18, loss=2.51][A
training routine:  38%|████████████████████████▎                                       | 19/50 [00:32<00:51,  1.67s/it][A
split=train:   0%|                                       | 0/60 [00:33<01:52,  1.87s/it, acc=26.1, epoch=19, loss=2.57][A


Epoch [20/50]
--------------------------------------------------



split=train:  42%|████████████████▎                      | 25/60 [00:33<01:05,  1.87s/it, acc=27.6, epoch=19, loss=2.5][A
split=train:  83%|████████████████████████████████▌      | 50/60 [00:34<00:18,  1.87s/it, acc=27.5, epoch=19, loss=2.5][A
training routine:  40%|█████████████████████████▌                                      | 20/50 [00:34<00:49,  1.66s/it][A
split=train:   0%|                                       | 0/60 [00:34<01:52,  1.87s/it, acc=25.5, epoch=20, loss=2.53][A


Epoch [21/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:35<01:05,  1.87s/it, acc=27.4, epoch=20, loss=2.49][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:35<00:18,  1.87s/it, acc=27.5, epoch=20, loss=2.49][A
training routine:  42%|██████████████████████████▉                                     | 21/50 [00:36<00:48,  1.66s/it][A
split=train:   0%|                                       | 0/60 [00:36<01:52,  1.87s/it, acc=28.3, epoch=21, loss=2.43][A


Epoch [22/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:36<01:05,  1.87s/it, acc=27.6, epoch=21, loss=2.49][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:37<00:18,  1.87s/it, acc=27.5, epoch=21, loss=2.49][A
training routine:  44%|████████████████████████████▏                                   | 22/50 [00:38<00:46,  1.68s/it][A
split=train:   0%|                                       | 0/60 [00:38<01:52,  1.87s/it, acc=28.5, epoch=22, loss=2.45][A


Epoch [23/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:38<01:05,  1.87s/it, acc=27.5, epoch=22, loss=2.48][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:39<00:18,  1.87s/it, acc=27.5, epoch=22, loss=2.48][A
training routine:  46%|█████████████████████████████▍                                  | 23/50 [00:39<00:46,  1.72s/it][A
split=train:   0%|                                       | 0/60 [00:39<01:52,  1.87s/it, acc=29.1, epoch=23, loss=2.43][A


Epoch [24/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:40<01:05,  1.87s/it, acc=27.7, epoch=23, loss=2.48][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:41<00:18,  1.87s/it, acc=27.6, epoch=23, loss=2.47][A
training routine:  48%|██████████████████████████████▋                                 | 24/50 [00:41<00:44,  1.70s/it][A
split=train:   0%|                                       | 0/60 [00:41<01:52,  1.87s/it, acc=27.9, epoch=24, loss=2.51][A


Epoch [25/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [00:42<01:05,  1.87s/it, acc=28, epoch=24, loss=2.47][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:42<00:18,  1.87s/it, acc=27.8, epoch=24, loss=2.47][A
training routine:  50%|████████████████████████████████                                | 25/50 [00:43<00:42,  1.71s/it][A
split=train:   0%|                                       | 0/60 [00:43<01:52,  1.87s/it, acc=27.7, epoch=25, loss=2.48][A


Epoch [26/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:43<01:05,  1.87s/it, acc=27.7, epoch=25, loss=2.47][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:44<00:18,  1.87s/it, acc=27.8, epoch=25, loss=2.47][A
training routine:  52%|█████████████████████████████████▎                              | 26/50 [00:45<00:42,  1.76s/it][A
split=train:   0%|                                         | 0/60 [00:45<01:52,  1.87s/it, acc=28, epoch=26, loss=2.45][A


Epoch [27/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:45<01:05,  1.87s/it, acc=27.9, epoch=26, loss=2.46][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:46<00:18,  1.87s/it, acc=27.9, epoch=26, loss=2.46][A
training routine:  54%|██████████████████████████████████▌                             | 27/50 [00:46<00:40,  1.78s/it][A
split=train:   0%|                                         | 0/60 [00:46<01:52,  1.87s/it, acc=28, epoch=27, loss=2.45][A


Epoch [28/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:47<01:05,  1.87s/it, acc=27.9, epoch=27, loss=2.47][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:48<00:18,  1.87s/it, acc=27.9, epoch=27, loss=2.46][A
training routine:  56%|███████████████████████████████████▊                            | 28/50 [00:48<00:38,  1.74s/it][A
split=train:   0%|                                       | 0/60 [00:48<01:52,  1.87s/it, acc=30.4, epoch=28, loss=2.43][A


Epoch [29/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:49<01:05,  1.87s/it, acc=28.5, epoch=28, loss=2.45][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:49<00:18,  1.87s/it, acc=28.4, epoch=28, loss=2.45][A
training routine:  58%|█████████████████████████████████████                           | 29/50 [00:50<00:35,  1.71s/it][A
split=train:   0%|                                       | 0/60 [00:50<01:52,  1.87s/it, acc=29.7, epoch=29, loss=2.43][A


Epoch [30/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:50<01:05,  1.87s/it, acc=28.1, epoch=29, loss=2.46][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:51<00:18,  1.87s/it, acc=28.4, epoch=29, loss=2.45][A
training routine:  60%|██████████████████████████████████████▍                         | 30/50 [00:51<00:33,  1.69s/it][A
split=train:   0%|                                       | 0/60 [00:51<01:52,  1.87s/it, acc=27.8, epoch=30, loss=2.43][A


Epoch [31/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [00:52<01:05,  1.87s/it, acc=28, epoch=30, loss=2.44][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:53<00:18,  1.87s/it, acc=28.1, epoch=30, loss=2.45][A
training routine:  62%|███████████████████████████████████████▋                        | 31/50 [00:53<00:31,  1.68s/it][A
split=train:   0%|                                       | 0/60 [00:53<01:52,  1.87s/it, acc=29.1, epoch=31, loss=2.43][A


Epoch [32/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:54<01:05,  1.87s/it, acc=28.7, epoch=31, loss=2.45][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:54<00:18,  1.87s/it, acc=28.6, epoch=31, loss=2.44][A
training routine:  64%|████████████████████████████████████████▉                       | 32/50 [00:55<00:29,  1.67s/it][A
split=train:   0%|                                       | 0/60 [00:55<01:52,  1.87s/it, acc=31.3, epoch=32, loss=2.42][A


Epoch [33/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:55<01:05,  1.87s/it, acc=28.7, epoch=32, loss=2.44][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:56<00:18,  1.87s/it, acc=28.4, epoch=32, loss=2.45][A
training routine:  66%|██████████████████████████████████████████▏                     | 33/50 [00:56<00:28,  1.68s/it][A
split=train:   0%|                                         | 0/60 [00:56<01:52,  1.87s/it, acc=27, epoch=33, loss=2.43][A


Epoch [34/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:57<01:05,  1.87s/it, acc=28.2, epoch=33, loss=2.44][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:58<00:18,  1.87s/it, acc=28.6, epoch=33, loss=2.43][A
training routine:  68%|███████████████████████████████████████████▌                    | 34/50 [00:58<00:27,  1.70s/it][A
split=train:   0%|                                        | 0/60 [00:58<01:52,  1.87s/it, acc=28.8, epoch=34, loss=2.4][A


Epoch [35/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [00:59<01:05,  1.87s/it, acc=28.3, epoch=34, loss=2.45][A
split=train:  83%|███████████████████████████████▋      | 50/60 [00:59<00:18,  1.87s/it, acc=28.5, epoch=34, loss=2.44][A
training routine:  70%|████████████████████████████████████████████▊                   | 35/50 [01:00<00:25,  1.70s/it][A
split=train:   0%|                                       | 0/60 [01:00<01:52,  1.87s/it, acc=26.2, epoch=35, loss=2.49][A


Epoch [36/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:00<01:05,  1.87s/it, acc=28.3, epoch=35, loss=2.44][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:01<00:18,  1.87s/it, acc=28.7, epoch=35, loss=2.43][A
training routine:  72%|██████████████████████████████████████████████                  | 36/50 [01:01<00:23,  1.69s/it][A
split=train:   0%|                                         | 0/60 [01:01<01:52,  1.87s/it, acc=27, epoch=36, loss=2.42][A


Epoch [37/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:02<01:05,  1.87s/it, acc=28.4, epoch=36, loss=2.44][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:03<00:18,  1.87s/it, acc=28.6, epoch=36, loss=2.43][A
training routine:  74%|███████████████████████████████████████████████▎                | 37/50 [01:03<00:21,  1.67s/it][A
split=train:   0%|                                       | 0/60 [01:03<01:52,  1.87s/it, acc=27.7, epoch=37, loss=2.46][A


Epoch [38/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:04<01:05,  1.87s/it, acc=28.3, epoch=37, loss=2.43][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:04<00:18,  1.87s/it, acc=28.6, epoch=37, loss=2.43][A
training routine:  76%|████████████████████████████████████████████████▋               | 38/50 [01:05<00:19,  1.67s/it][A
split=train:   0%|                                       | 0/60 [01:05<01:52,  1.87s/it, acc=26.8, epoch=38, loss=2.49][A


Epoch [39/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:05<01:05,  1.87s/it, acc=28.4, epoch=38, loss=2.43][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:06<00:18,  1.87s/it, acc=28.7, epoch=38, loss=2.43][A
training routine:  78%|█████████████████████████████████████████████████▉              | 39/50 [01:06<00:18,  1.66s/it][A
split=train:   0%|                                        | 0/60 [01:06<01:52,  1.87s/it, acc=31.1, epoch=39, loss=2.4][A


Epoch [40/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:07<01:05,  1.87s/it, acc=29.6, epoch=39, loss=2.41][A
split=train:  83%|█████████████████████████████████▎      | 50/60 [01:08<00:18,  1.87s/it, acc=29, epoch=39, loss=2.42][A
training routine:  80%|███████████████████████████████████████████████████▏            | 40/50 [01:08<00:16,  1.69s/it][A
split=train:   0%|                                       | 0/60 [01:08<01:52,  1.87s/it, acc=28.4, epoch=40, loss=2.47][A


Epoch [41/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [01:09<01:05,  1.87s/it, acc=29, epoch=40, loss=2.43][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:09<00:18,  1.87s/it, acc=28.9, epoch=40, loss=2.43][A
training routine:  82%|████████████████████████████████████████████████████▍           | 41/50 [01:10<00:15,  1.70s/it][A
split=train:   0%|                                         | 0/60 [01:10<01:52,  1.87s/it, acc=28, epoch=41, loss=2.45][A


Epoch [42/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:11<01:05,  1.87s/it, acc=28.5, epoch=41, loss=2.42][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:11<00:18,  1.87s/it, acc=28.6, epoch=41, loss=2.42][A
training routine:  84%|█████████████████████████████████████████████████████▊          | 42/50 [01:12<00:13,  1.69s/it][A
split=train:   0%|                                       | 0/60 [01:12<01:52,  1.87s/it, acc=27.4, epoch=42, loss=2.46][A


Epoch [43/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:12<01:05,  1.87s/it, acc=28.5, epoch=42, loss=2.43][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:13<00:18,  1.87s/it, acc=28.7, epoch=42, loss=2.42][A
training routine:  86%|███████████████████████████████████████████████████████         | 43/50 [01:13<00:11,  1.68s/it][A
split=train:   0%|                                       | 0/60 [01:13<01:52,  1.87s/it, acc=28.4, epoch=43, loss=2.43][A


Epoch [44/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:14<01:05,  1.87s/it, acc=29.1, epoch=43, loss=2.42][A
split=train:  83%|█████████████████████████████████▎      | 50/60 [01:14<00:18,  1.87s/it, acc=29, epoch=43, loss=2.42][A
training routine:  88%|████████████████████████████████████████████████████████▎       | 44/50 [01:15<00:10,  1.69s/it][A
split=train:   0%|                                       | 0/60 [01:15<01:52,  1.87s/it, acc=29.3, epoch=44, loss=2.41][A


Epoch [45/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:16<01:05,  1.87s/it, acc=28.8, epoch=44, loss=2.42][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:16<00:18,  1.87s/it, acc=28.8, epoch=44, loss=2.42][A
training routine:  90%|█████████████████████████████████████████████████████████▌      | 45/50 [01:17<00:08,  1.70s/it][A
split=train:   0%|                                       | 0/60 [01:17<01:52,  1.87s/it, acc=28.5, epoch=45, loss=2.43][A


Epoch [46/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:17<01:05,  1.87s/it, acc=28.7, epoch=45, loss=2.42][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:18<00:18,  1.87s/it, acc=28.7, epoch=45, loss=2.42][A
training routine:  92%|██████████████████████████████████████████████████████████▉     | 46/50 [01:18<00:06,  1.71s/it][A
split=train:   0%|                                       | 0/60 [01:18<01:52,  1.87s/it, acc=27.7, epoch=46, loss=2.43][A


Epoch [47/50]
--------------------------------------------------



split=train:  42%|███████████████▊                      | 25/60 [01:19<01:05,  1.87s/it, acc=28.7, epoch=46, loss=2.42][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:20<00:18,  1.87s/it, acc=28.7, epoch=46, loss=2.42][A
training routine:  94%|████████████████████████████████████████████████████████████▏   | 47/50 [01:20<00:05,  1.71s/it][A
split=train:   0%|                                       | 0/60 [01:20<01:52,  1.87s/it, acc=27.8, epoch=47, loss=2.48][A


Epoch [48/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [01:21<01:05,  1.87s/it, acc=29, epoch=47, loss=2.41][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:21<00:18,  1.87s/it, acc=28.8, epoch=47, loss=2.42][A
training routine:  96%|█████████████████████████████████████████████████████████████▍  | 48/50 [01:22<00:03,  1.72s/it][A
split=train:   0%|                                        | 0/60 [01:22<01:52,  1.87s/it, acc=28.6, epoch=48, loss=2.4][A


Epoch [49/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [01:22<01:05,  1.87s/it, acc=29, epoch=48, loss=2.41][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:23<00:18,  1.87s/it, acc=28.8, epoch=48, loss=2.42][A
training routine:  98%|██████████████████████████████████████████████████████████████▋ | 49/50 [01:24<00:01,  1.71s/it][A
split=train:   0%|                                       | 0/60 [01:24<01:52,  1.87s/it, acc=29.8, epoch=49, loss=2.38][A


Epoch [50/50]
--------------------------------------------------



split=train:  42%|████████████████▋                       | 25/60 [01:24<01:05,  1.87s/it, acc=29, epoch=49, loss=2.41][A
split=train:  83%|███████████████████████████████▋      | 50/60 [01:25<00:18,  1.87s/it, acc=28.9, epoch=49, loss=2.41][A
training routine: 100%|████████████████████████████████████████████████████████████████| 50/50 [01:25<00:00,  1.70s/it][A

In [4]:
model = model.cpu()
for index in [14]:
    nationality = vectorizer.nationality_vocab.lookup_index(index)
    print("Sampled for {}: ".format(nationality))
    sampled_indices = sample_from_model(model, vectorizer,
                                        nationalities=[index] * 5,
                                        temperature=0.7)
    for sampled_surname in decode_samples(sampled_indices, vectorizer):
        print("-  " + sampled_surname)

Sampled for Russian: 
-  Leghakov
-  Faz
-  Baner
-  Ahramev
-  Atdudamich
