# MetaCAT - Training biLSTM

In [1]:
import numpy as np
from tokenizers import ByteLevelBPETokenizer
from pathlib import Path
from medcat.meta_cat import MetaCAT
from medcat.preprocessing.tokenizers import TokenizerWrapperBPE

In [2]:
# Input
data_dir = Path.cwd().parents[0] / 'data'
annotation_file = data_dir / 'emc-dcc_ann.json'

# Output
model_dir = Path.cwd().parents[0] / 'models' / 'bilstm'
embeddings_file = model_dir / 'embeddings.npy'

# Name should contain 'bbpe' for ByteLevelBPETokenizer or 'bert' for BertTokenizerFast
# This name is saved in the model_config dict and subssequently in vars.dat on disk.
tokenizer_name = 'bbpe_dutch-wikipedia'

## Load Tokenizer and embeddings matrix

In [3]:
tokenizer = TokenizerWrapperBPE.load(model_dir, tokenizer_name)
embeddings = np.load(embeddings_file)

## Train biLSTM

In [7]:
from medcat.meta_cat import MetaCAT

# Initiate MetaCAT
mc_negation = MetaCAT(tokenizer=tokenizer,
                      embeddings=embeddings,
                      pad_id=len(embeddings)-1,
                      save_dir=model_dir,
                      device='cpu')

# Train model
results = mc_negation.train(annotation_file, 
                            'Negation',
                            nepochs=10,
                            model_config={'tokenizer_name': tokenizer_name})

Epoch: 0 **************************************************  Train
              precision    recall  f1-score   support

           0       0.85      0.69      0.76      1591
           1       0.95      0.98      0.97      9758

    accuracy                           0.94     11349
   macro avg       0.90      0.84      0.86     11349
weighted avg       0.94      0.94      0.94     11349

Epoch: 0 **************************************************  Test
              precision    recall  f1-score   support

           0       0.90      0.78      0.84       182
           1       0.96      0.99      0.97      1080

    accuracy                           0.96      1262
   macro avg       0.93      0.88      0.91      1262
weighted avg       0.96      0.96      0.96      1262

Train Loss: 0.19756084363776402
Test Loss:  0.15934789850143716






Model saved at epoch: 0 and f1: 0.955060714508799
[[ 142   40]
 [  15 1065]]



Epoch: 1 **************************************************  Tr

## Save model

In [9]:
# Save model config
mc_negation.save(full_save=False)