In [1]:
from pathlib import Path
import re
import torch
from transformers import TrainingArguments


def read_lsconll(file_path):
    file_path = Path(file_path)

    raw_text = file_path.read_text().strip()
    
    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []
    for doc in raw_docs:
                        
        tokens = []
        tags = []
        for line in doc.split('\n'):
            try:
                token, _, _, tag = line.split(' ')
            except ValueError:
                token, _, tag = line.split(' ')
                
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs

def read_conll(file_path):
    file_path = Path(file_path)

    raw_text = file_path.read_text().strip()
    
    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []
    for doc in raw_docs:
                
        tokens = []
        tags = []
        for line in doc.split('\n'):
            try:
                token, tag = line.split('\t')
            except ValueError:
                token, tag = line.split(' ')
                
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs

In [2]:
from simpletransformers.ner import NERModel, NERArgs
from itertools import product

model_args = NERArgs()

labels_prefixes = ["B-", "I-"]
labels_suffixes = ["MethodName", "HyperparameterName", "HyperparameterValue",
                          "MetricName", "MetricValue", "TaskName", "DatasetName"]

labels_list = [ ''.join(x) for x in product(labels_prefixes, labels_suffixes)] + ["O"]

model_args.labels_list = labels_list

model_args.train_batch_size = 32
model_args.evaluate_during_training = True
model_args.num_train_epochs = 20
model_args.learning_rate = 1e-4
model_args.early_stopping_consider_epochs = True

model = NERModel(
    "bert", "allenai/scibert_scivocab_uncased", use_cuda=False, args=model_args
)

if torch.backends.mps.is_available():
    model.device = 'mps'

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

In [3]:
import pandas as pd
import os

train_data_path = "../data/final_dataset/manual_formatted/train/"
eval_data_path = "../data/final_dataset/manual_formatted/test/"


def get_df_from_conll(path):
    
    
    if os.path.isdir(path):
        filenames = os.listdir(path)
        filepaths = [os.path.join(path, filename) for filename in filenames]
    else:
        filepaths = [path]
    
    df = []
    sentence_num = 0
    
    for filepath in filepaths:
        data = read_conll(filepath)
    
        for sample_idx in range(len(data[0])):
            for token_idx in range(len(data[0][sample_idx])):
                df.append([sentence_num, data[0][sample_idx][token_idx], data[1][sample_idx][token_idx]])
            
            sentence_num += 1
    
    df = pd.DataFrame(df, columns=["sentence_id", "words", "labels"])
    return df

train_data = get_df_from_conll(train_data_path)
eval_data = get_df_from_conll(eval_data_path)

In [4]:
model.train_model(train_data, eval_data=eval_data)

  return [


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Running Epoch 0 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Running Epoch 1 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Running Epoch 2 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Running Epoch 3 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 4 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 5 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 6 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 7 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 8 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 9 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 10 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 11 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 12 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 13 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 14 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 15 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 16 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 17 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 18 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 19 of 20:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

(340,
 defaultdict(list,
             {'global_step': [17,
               34,
               51,
               68,
               85,
               102,
               119,
               136,
               153,
               170,
               187,
               204,
               221,
               238,
               255,
               272,
               289,
               306,
               323,
               340],
              'train_loss': [0.12686067819595337,
               0.11911395192146301,
               0.0653909221291542,
               0.09829109907150269,
               0.018254844471812248,
               0.015498025342822075,
               0.010530555620789528,
               0.04564833641052246,
               0.010217799805104733,
               0.019890738651156425,
               0.0036113925743848085,
               0.00016018036694731563,
               0.0007375174318440259,
               0.0008998610428534448,
               0.0067594177089631

In [5]:
result, model_outputs, preds_list = model.eval_model(eval_data)

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
preds_list

[['O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterName',
  'I-HyperparameterName',
  'I-HyperparameterName',
  'O',
  'B-DatasetName',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'I-HyperparameterName',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'I-HyperparameterName',
  'O',
  'B-HyperparameterValue',
  'I-HyperparameterValue',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'I-HyperparameterName',
  'I-HyperparameterName',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',


In [8]:
result

{'eval_loss': 0.35971972346305847,
 'precision': 0.36423841059602646,
 'recall': 0.47413793103448276,
 'f1_score': 0.4119850187265917}