In [15]:
import numpy as np
import torch.nn as nn
import torch
from torch.nn import Module
import data_loader
import torch.optim as optim
from tqdm import tqdm

# DATA PREP

In [17]:
train_dataloader, dev_dataloader, test_dataloader, vocabulary, tagset, pretrained_embeddings = data_loader.load("UD_German-GSD-master/de_gsd-ud-train.conllu", "UD_German-GSD-master/de_gsd-ud-dev.conllu", "UD_German-GSD-master/de_gsd-ud-test.conllu")

# create embeddings with size (length_vocabulary, 300)
embedding = torch.nn.Embedding(num_embeddings=len(vocabulary), embedding_dim=300)
embedding.weight.data = pretrained_embeddings
embedding.weight.requires_grad = False

.vector_cache\wiki.de.vec: 5.97GB [07:07, 13.9MB/s]                                                                    
  0%|                                                                                      | 0/2275233 [00:00<?, ?it/s]Skipping token b'2275233' with 1-dimensional vector [b'300']; likely a header
100%|██████████████████████████████████████████████████████████████████████| 2275233/2275233 [04:02<00:00, 9397.50it/s]


# SAMPLE DATA EXPLORATION

In [18]:
sample = train_dataloader.dataset[0]
sample

{'sentence': tensor([   58,   282,   443,     3,  2300, 20144,     4,   806,     3,    61,
           948,    45,   171,  4091,    57,     2]),
 'tags': tensor([7, 6, 0, 1, 6, 0, 2, 0, 1, 7, 5, 8, 8, 0, 3, 1])}

In [19]:
vocabulary.lookup_tokens(list(sample["sentence"]))

['sehr',
 'gute',
 'beratung',
 ',',
 'schnelle',
 'behebung',
 'der',
 'probleme',
 ',',
 'so',
 'stelle',
 'ich',
 'mir',
 'kundenservice',
 'vor',
 '</s>']

In [21]:
tagset.lookup_tokens(list(sample["tags"]))

['ADV',
 'ADJ',
 'NOUN',
 'PUNCT',
 'ADJ',
 'NOUN',
 'DET',
 'NOUN',
 'PUNCT',
 'ADV',
 'VERB',
 'PRON',
 'PRON',
 'NOUN',
 'ADP',
 'PUNCT']

# MODEL CLASS

In [23]:
class NNTagger(nn.Module):
    """
    A POS tagger for the German sentences.
    Predicts POS tag for a tensor of embeddings of words.
    A 2 layer Feedforward Neural Network.
    """
    
    # params of the model
    def __init__(self,
                input_size: int,
                number_tags: int,
                hidden_size: int):
        """

        :param input_size: the size of the input layer - length (n. of cols) of an input tensor; must be equal to embedding length
        :param number_tags: the length of POS tag set - length (n. of cols) of an output;
        :param hidden_size: the size of the hidden layer
        """
        super(NNTagger, self).__init__()

        self.layer1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True) #[inp=300, out=300]; hidden_size - number of hidden units
        self.layer2 = nn.Linear(in_features=hidden_size, out_features=number_tags) #[inp=300, out=18]
        
        
    # forward function
    def forward(self, 
                input_sent):
        """
        Gets an input batch and feeds it through the layers of the network to get the output predictions for each input
        :param input_sent: a batch of tensors corresponding to sentences; shape: (batch_size, sentence_length, embedding_length)
        :return: the predictions of the model; shape: (batch_size, sentence_length, number_tags)
        """
        # feed input batch into the lstm layer
        out_lstm_layer, _ = self.layer1(input_sent)

        # feed output of lstm layer to the linear layer
        out_linear_layer = self.layer2(out_lstm_layer)

        return out_linear_layer


# MODEL INITIALIZATION

In [24]:
# create an instance of the model to train

hidden_size = 300
input_size = 300
num_tags = len(tagset) #18 pos tags

tagger = NNTagger(input_size=input_size, number_tags=num_tags, hidden_size=hidden_size)

# HYPERPARAMETERS

In [25]:
### Training Parameters ###

epochs = 10

learning_rate = 0.01

optimizer = optim.Adam
 
loss_function = nn.functional.cross_entropy

# TRAINING FUNCTION

In [29]:
def train_model(model: Module, 
                train_data, dev_data,
                num_epochs: int,
                optimizer_type,
                loss_function,
                learning_rate: float,
                embedding) -> None:
    """
    Does one commplete training run for N epochs
    :param model: a pytorch model
    :param train_data: a dataloader for getting the training instances
    :param num_epochs: the number of epochs
    :param optimizer_type: the type of optimizer
    :param loss_function: the type of loss function
    :param learning_rate: the learning rate for the optimizer
    :param embedding: embedding object with size (vocab_length, 300)
    :return:
    """
    
    print(f'--------- Start Training ------------')

    # brings model into training mode
    model.train()

    optimizer = optimizer_type(params=model.parameters(), lr=learning_rate)

    # runs training for specified number of epochs
    for epoch in tqdm(range(num_epochs), desc='POS Tagger Training\n'):
        
        print(f'---------- Started Epoch {epoch+1} -----------')

        for sent in train_data:
            
            # transforms one-hot encoding into embedding tensor
            input_sent = torch.stack([embedding.weight.data[word] for word in sent[0][0]])
            # wraps into one more tensor to keep the right dimentionality: (batch_size=1, sent_length, embedding_length)
            input_sent = torch.stack([input_sent])

            # computes model predictions with current model parameters
            model_output = model(input_sent)

            # computes Loss for current sent
            loss = loss_function(input=model_output[0], target=sent[0][1])

            optimizer.zero_grad()

            loss.backward()

            # updates parameters
            optimizer.step()
        
        train_acc, dev_acc = evaluate(tagger, [train_data, dev_data])
        #experiment.log_metric("dev_accuracy", dev_acc, epoch=epoch)
        #experiment.log_metric("train_accuracy", train_acc, epoch=epoch)


# EVALUATION FUNCTION

In [27]:
# brings model in evaluation mode
def evaluate(model, eval_set):
    tagger.eval()
    
    correct_preds = [0 for ds in eval_set]
    num_tokens = [0 for ds in eval_set]
    final_accs = [0 for ds in eval_set]
    
    with torch.no_grad():
        for ds in range(len(eval_set)):
            for sent in eval_set[ds]:

                # model output of size (sent_length, num_tags), logit vals
                input_sent = torch.stack([embedding.weight.data[word] for word in sent[0][0]])
                input_sent = torch.stack([input_sent])
                sent_pred = tagger(input_sent)
                sent_pred = torch.sigmoid(sent_pred)
                # prediction vector of size (1, pred_tag_inds)
                sent_pred = torch.argmax(sent_pred, dim=2)

                # target vec (1, tag_inds)
                target_vec = sent[0][1]
                num_tokens[ds] += len(target_vec)

                # gets accuracy on the whole data
                correct_preds[ds] += torch.sum(sent_pred == target_vec)
        
            final_accs[ds] = (correct_preds[ds] / num_tokens[ds]) * 100
    
            print(f"ACCURACY: {torch.round(final_accs[ds])}% in {ds} ds")
    return final_accs

# MODEL TRAINING

In [None]:
# runs a complete training loop
#for sent in train_dataloader:
train_model(model=tagger, train_data=train_dataloader, dev_data=dev_dataloader, num_epochs=epochs, 
            optimizer_type=optimizer, loss_function=loss_function, learning_rate=learning_rate, embedding=embedding)

POS Tagger Training
:   0%|                                                                     | 0/10 [00:00<?, ?it/s]

--------- Start Training ------------
---------- Started Epoch 1 -----------


# EVALUATION ON TEST DATA

In [None]:
evaluate(tagger, [test_dataloader])

# SAVE AND LOAD MODEL PARAMS

In [37]:
# save the model parameters in a .pt file
torch.save(tagger.state_dict(), "./model_parameters_tagger10e300FM.pt")

In [26]:
# load trained model parameters again

# first create an instance of the model class
tagger = NNTagger(input_size=input_size, number_tags=num_tags, hidden_size=hidden_size)

# then load the trained parameters
tagger.load_state_dict(torch.load("./model_parameters_tagger10e300FM.pt"))

<All keys matched successfully>