# MetaCAT - Training biLSTM

In [1]:
import numpy as np
from pathlib import Path
from medcat.meta_cat import MetaCAT
from medcat.config_meta_cat import ConfigMetaCAT
from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBPE

In [2]:
# Input & output
data_dir = Path.cwd().parents[0] / 'data'
annotation_file = data_dir / 'emc-dcc_ann.json'
model_dir = Path.cwd().parents[0] / 'models' / 'bilstm'
embeddings_file = model_dir / 'embeddings.npy'

# Config
config_metacat = ConfigMetaCAT()
config_metacat.general['category_name'] = 'Negation'
config_metacat.train['nepochs'] = 10

## Load Tokenizer and embeddings matrix

In [3]:
tokenizer = TokenizerWrapperBPE.load(model_dir)
embeddings = np.load(embeddings_file)

## Train biLSTM

In [4]:
# Initiate MetaCAT
mc_negation = MetaCAT(tokenizer=tokenizer, embeddings=embeddings, config=config_metacat)

In [5]:
# Train model
results = mc_negation.train(json_path=annotation_file, save_dir_path=model_dir)

Epoch: 0 **************************************************  Train
              precision    recall  f1-score   support

           0       0.85      0.68      0.76      1569
           1       0.95      0.98      0.96      9727

    accuracy                           0.94     11296
   macro avg       0.90      0.83      0.86     11296
weighted avg       0.94      0.94      0.94     11296

Epoch: 0 **************************************************  Test
              precision    recall  f1-score   support

           0       0.92      0.88      0.90       191
           1       0.98      0.99      0.98      1064

    accuracy                           0.97      1255
   macro avg       0.95      0.93      0.94      1255
weighted avg       0.97      0.97      0.97      1255


##### Model saved to /Users/stan3/Data/negation-detection/models/bilstm/model.dat at epoch: 0 and f1: 0.9702243284980442 #####

Epoch: 1 **************************************************  Train
              pre