In [1]:
from pathlib import Path
import re
import torch
from transformers import TrainingArguments


def read_lsconll(file_path):
    file_path = Path(file_path)

    raw_text = file_path.read_text().strip()
    
    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []
    for doc in raw_docs:
                        
        tokens = []
        tags = []
        for line in doc.split('\n'):
            try:
                token, _, _, tag = line.split(' ')
            except ValueError:
                token, _, tag = line.split(' ')
                
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs

def read_conll(file_path):
    file_path = Path(file_path)

    raw_text = file_path.read_text().strip()
    
    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []
    for doc in raw_docs:
                
        tokens = []
        tags = []
        for line in doc.split('\n'):
            try:
                token, tag = line.split('\t')
            except ValueError:
                token, tag = line.split(' ')
                
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs


# class TrainingArgumentsWithMPSSupport(TrainingArguments):

#     @property
#     def device(self) -> torch.device:
#         if torch.cuda.is_available():
#             return torch.device("cuda")
#         elif torch.backends.mps.is_available():
#             return torch.device("mps")
#         else:
#             return torch.device("cpu")


torch.backends.mps.is_available()

True

In [24]:
from simpletransformers.ner import NERModel, NERArgs
from itertools import product

model_args = NERArgs()

labels_prefixes = ["B-", "I-"]
labels_suffixes = ["MethodName", "HyperparameterName", "HyperparameterValue",
                          "MetricName", "MetricValue", "TaskName", "DatasetName"]

labels_list = [ ''.join(x) for x in product(labels_prefixes, labels_suffixes)] + ["O"]

model_args.labels_list = labels_list

model_args.train_batch_size = 32
model_args.evaluate_during_training = True
model_args.num_train_epochs = 10
model_args.learning_rate = 4e-5

model = NERModel(
    "bert", "allenai/scibert_scivocab_uncased", use_cuda=False, args=model_args
)

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertForTokenClassification: ['cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initi

In [25]:
import pandas as pd
import os

train_data_path = "../data/final_dataset/manual_formatted/train/"
eval_data_path = "../data/final_dataset/manual_formatted/test/"


def get_df_from_conll(path):
    
    
    if os.path.isdir(path):
        filenames = os.listdir(path)
        filepaths = [os.path.join(path, filename) for filename in filenames]
    else:
        filepaths = [path]
    
    df = []
    sentence_num = 0
    
    for filepath in filepaths:
        data = read_conll(filepath)
    
        for sample_idx in range(len(data[0])):
            for token_idx in range(len(data[0][sample_idx])):
                df.append([sentence_num, data[0][sample_idx][token_idx], data[1][sample_idx][token_idx]])
            
            sentence_num += 1
    
    df = pd.DataFrame(df, columns=["sentence_id", "words", "labels"])
    return df

train_data = get_df_from_conll(train_data_path)
eval_data = get_df_from_conll(eval_data_path)

In [26]:
model.train_model(train_data, eval_data=eval_data)

  return [


  0%|          | 0/2 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Running Epoch 1 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/17 [00:00<?, ?it/s]

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

(170,
 defaultdict(list,
             {'global_step': [17, 34, 51, 68, 85, 102, 119, 136, 153, 170],
              'train_loss': [0.6752484440803528,
               0.23382562398910522,
               0.17894312739372253,
               0.2476365566253662,
               0.1649823635816574,
               0.038405559957027435,
               0.06731068342924118,
               0.03487905114889145,
               0.015774184837937355,
               0.030581034719944],
              'eval_loss': [0.4160388559103012,
               0.35463009402155876,
               0.2898179665207863,
               0.28644922748208046,
               0.29755645245313644,
               0.30797505378723145,
               0.29764577373862267,
               0.31123168021440506,
               0.3266013003885746,
               0.3302871510386467],
              'precision': [0.0,
               0.0,
               0.22727272727272727,
               0.26732673267326734,
               0.261904761904761

In [27]:
result, model_outputs, preds_list = model.eval_model(eval_data)

  return [


  0%|          | 0/1 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

In [29]:
result

{'eval_loss': 0.3302871510386467,
 'precision': 0.3076923076923077,
 'recall': 0.3448275862068966,
 'f1_score': 0.3252032520325204}

In [28]:
preds_list

[['O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'I-HyperparameterName',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-HyperparameterName',
  'O',
  'B-HyperparameterName',
  'O',
  'B-HyperparameterValue',
  'O',
  'O',
  'B-HyperparameterName',
  'I-Hyperp