In [None]:
!pip install -U flair

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
import os

path = "/content/drive/MyDrive/NLP_Project_New"
sys.path.append(os.path.abspath(path))

In [3]:
import nlp_project_functions as functions

from flair.data import Sentence
from flair.models import SequenceTagger

import pandas as pd
import math

from datetime import datetime
import os.path

import logging

logfile = f"{path}/logs/run_model.log"
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

file_handler = logging.FileHandler(logfile)
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.INFO)

stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
stream_handler.setLevel(logging.INFO)

logger = logging.getLogger("run_model.log")
logger.setLevel(logging.INFO)

logger.addHandler(file_handler)
logger.addHandler(stream_handler)

In [6]:
# get current date, time, and model
now = datetime.now()
timestamp = now.strftime("%m-%d-%H-%M-%S")
day = now.strftime("%d-%m")

model_name = "dbmdz/flair-historic-ner-onb"

model_checkpoint = model_name
attempt = model_name + '--' + timestamp

logger.info(f"Initiated model test. Attempt: {attempt}")

2024-03-26 08:44:50,659 - run_model.log - INFO - Initiated model test. Attempt: dbmdz/flair-historic-ner-onb--03-26-08-44-50
INFO:run_model.log:Initiated model test. Attempt: dbmdz/flair-historic-ner-onb--03-26-08-44-50


In [7]:
def create_checksum(df):
    test_string = ""
    for i in range(5):
        if isinstance(df.iloc[i]["TOKEN"], str):
            test_string += df.iloc[i]["TOKEN"]

    for i in range(len(df) - 5, len(df)):
        if isinstance(df.iloc[i]["TOKEN"], str):
            test_string += df.iloc[i]["TOKEN"]

    return test_string

In [8]:
def list_flair_results(sentence: str, predictions: list) -> tuple:
    words = sentence.split(" ")
    results = []

    running_char = 0

    for word in words:
      found_match = next((d for d in predictions if d.get("start_pos") == running_char), None)
      if found_match:
        results.append(found_match.get('labels')[0]["value"])
      else:
        results.append("O")
      running_char += len(word) + 1

    return words, results

In [None]:
tagger = SequenceTagger.load("dbmdz/flair-historic-ner-lft")

In [17]:
for i in range(1,8):
  test_texts, test_labels = functions.read_conll_data(f'{path}/data/advers_tests/perturbation_{i}.tsv')
  all_sentences = []

  for text in test_texts:
    sentence = " ".join(text)
    all_sentences.append(sentence)

  all_predictions = []
  total = len(all_sentences)

  for idx, item in enumerate(all_sentences):

    if idx % 500 == 0:
      print(f"On prediction {idx} ({((idx/total)*100):.2f}% done).")
    sentence = Sentence(item)
    tagger.predict(sentence)
    entities = sentence.get_spans('ner')
    if len(entities) > 0:
      ent_list = []
      for entity in entities:
        ent_list.append(entity.to_dict())
      all_predictions.append(ent_list)
    else:
      all_predictions.append([])

  tokens = []
  labels = []
  predictions = []
  for text, prediction, labels_list in zip(all_sentences, all_predictions, test_labels):
    word_list, prediction_list = list_flair_results(text, prediction)
    tokens.extend(word_list)
    tokens.append("")
    labels.extend(labels_list)
    labels.append("")
    predictions.extend(prediction_list)
    predictions.append("_")

  label_vocab = set(predictions)

  predictions = [item.replace('_', '') for item in predictions]


  predictions = functions.transform_to_BIO(predictions)

  prediction_comparison = pd.DataFrame(
    {"TOKEN": tokens,
     "NER": labels,
     attempt: predictions
     })

  prediction_comparison.drop(prediction_comparison.tail(1).index,inplace=True)

  pred_checksum = create_checksum(prediction_comparison)

  comparison_path = f'{path}/advers_model_comparisons/perturb_{i}_comparison.tsv'

  if os.path.isfile(comparison_path):
      pred_comp_df = pd.read_csv(comparison_path, sep='\t')
      file_checksum = create_checksum(pred_comp_df)
      if file_checksum == pred_checksum:
          pred_comp_df[attempt] = predictions[:-1]
          pred_comp_df.to_csv(comparison_path, sep='\t', index=False)
          logger.info("The predictions were added to the predictions table.")
      else:
          logger.error("The checksums for the loaded test data and the predictions table do not match. Maybe the train/test/dev split has changed?")
          raise Exception("The checksums for the loaded test data and the predictions table do not match. Maybe the train/test/dev split has changed?")
  else:
      prediction_comparison.to_csv(comparison_path, sep='\t', index=False)
      logger.info(f"No predictions table was found at location {comparison_path}, so a new one was created.")


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 08:55:26,553 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 08:56:48,893 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 08:58:12,079 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 08:59:35,653 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 09:00:58,677 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 09:02:21,791 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


On prediction 0 (0.00% done).
On prediction 500 (25.67% done).
On prediction 1000 (51.33% done).
On prediction 1500 (77.00% done).


2024-03-26 09:03:45,850 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.


In [15]:
len(pred_comp_df)

54303

In [16]:
len(prediction_comparison)

54303

In [None]:
test_texts, test_labels = functions.read_conll_data('/content/drive/MyDrive/NLP_Project_New/data/train_test_val/test.tsv')

In [None]:
all_sentences = []

for text in test_texts:
  sentence = " ".join(text)
  all_sentences.append(sentence)

In [None]:
len(all_sentences)

5859

In [None]:
all_predictions = []
total = len(all_sentences)
for idx, item in enumerate(all_sentences):

  if idx % 500 == 0:
    print(f"On prediction {idx} ({((idx/total)*100):.2f}% done).")
  sentence = Sentence(item)
  tagger.predict(sentence)
  entities = sentence.get_spans('ner')
  if len(entities) > 0:
    ent_list = []
    for entity in entities:
      ent_list.append(entity.to_dict())
    all_predictions.append(ent_list)
  else:
    all_predictions.append([])

On prediction 0 (0.00% done).
On prediction 100 (1000.00% done).
On prediction 200 (2000.00% done).
On prediction 300 (3000.00% done).
On prediction 400 (4000.00% done).
On prediction 500 (5000.00% done).
On prediction 600 (6000.00% done).
On prediction 700 (7000.00% done).
On prediction 800 (8000.00% done).
On prediction 900 (9000.00% done).
On prediction 1000 (10000.00% done).
On prediction 1100 (11000.00% done).
On prediction 1200 (12000.00% done).
On prediction 1300 (13000.00% done).
On prediction 1400 (14000.00% done).
On prediction 1500 (15000.00% done).
On prediction 1600 (16000.00% done).
On prediction 1700 (17000.00% done).
On prediction 1800 (18000.00% done).
On prediction 1900 (19000.00% done).
On prediction 2000 (20000.00% done).
On prediction 2100 (21000.00% done).
On prediction 2200 (22000.00% done).
On prediction 2300 (23000.00% done).
On prediction 2400 (24000.00% done).
On prediction 2500 (25000.00% done).
On prediction 2600 (26000.00% done).
On prediction 2700 (27000.

In [None]:
def list_flair_results(sentence: str, predictions: list) -> tuple:
    words = sentence.split(" ")
    results = []

    running_char = 0

    for word in words:
      found_match = next((d for d in predictions if d.get("start_pos") == running_char), None)
      if found_match:
        results.append(found_match.get('labels')[0]["value"])
      else:
        results.append("O")
      running_char += len(word) + 1

    return words, results

In [None]:
label_transl = {"LABEL_0": "PER", "LABEL_1": "LOC", "O": "O", "_": ""}

In [None]:
tokens = []
labels = []
predictions = []
for text, prediction, labels_list in zip(all_sentences, all_predictions, test_labels):
  word_list, prediction_list = list_flair_results(text, prediction)
  tokens.extend(word_list)
  tokens.append("")
  labels.extend(labels_list)
  labels.append("")
  predictions.extend(prediction_list)
  predictions.append("_")

label_vocab = set(predictions)

if "PER" not in label_vocab:
  predictions = [label_transl[item] for item in predictions]
else:
  predictions = [item.replace('_', '') for item in predictions]


predictions = functions.transform_to_BIO(predictions)

In [None]:
prediction_comparison = pd.DataFrame(
    {"TOKEN": tokens,
     "NER": labels,
     attempt: predictions
     })

In [None]:
len(prediction_comparison)

162909

In [None]:
pred_checksum = create_checksum(prediction_comparison)

In [None]:
comparison_path = f'{path}/model_comparisons/test-predictions_comparison.tsv'

if os.path.isfile(comparison_path):
    pred_comp_df = pd.read_csv(comparison_path, sep='\t')
    file_checksum = create_checksum(pred_comp_df)
    if file_checksum == pred_checksum:
        pred_comp_df[attempt] = predictions
        pred_comp_df.to_csv(comparison_path, sep='\t', index=False)
        logger.info("The predictions were added to the predictions table.")
    else:
        logger.error("The checksums for the loaded test data and the predictions table do not match. Maybe the train/test/dev split has changed?")
        raise Exception("The checksums for the loaded test data and the predictions table do not match. Maybe the train/test/dev split has changed?")
else:
    prediction_comparison.to_csv(comparison_path, sep='\t', index=False)
    logger.info(f"No predictions table was found at location {comparison_path}, so a new one was created.")

2024-03-25 09:19:27,748 - run_model.log - INFO - The predictions were added to the predictions table.
INFO:run_model.log:The predictions were added to the predictions table.
