In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys
import os

path = "/content/drive/MyDrive/NLP_Project_New"
sys.path.append(os.path.abspath(path))

In [None]:
import pandas as pd
from datetime import datetime
import os.path

import logging

logfile = f"{path}/logs/evaluate_model.log"
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

file_handler = logging.FileHandler(logfile)
file_handler.setFormatter(formatter)
file_handler.setLevel(logging.INFO)

stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
stream_handler.setLevel(logging.INFO)

logger = logging.getLogger("evaluate_model.log")
logger.setLevel(logging.INFO)

logger.addHandler(file_handler)
logger.addHandler(stream_handler)

In [None]:
def helper(x: str) -> str:
    if x == "B-PER":
        return 1
    elif x == "I-PER":
        return 2
    elif x == "B-LOC":
        return 3
    elif x == "I-LOC":
        return 4
    else:
        return 0

In [None]:
# get current date and time
now = datetime.now()
timestamp = now.strftime("%m-%d-%H-%M-%S")
test_pred_path = f'{path}/model_comparisons/smaller_test-predictions_comparison_new.tsv'

df = pd.read_csv(test_pred_path, sep="\t")

logger.info(f"Initialised model evaluation. Predictions table loaded from location {test_pred_path}")


2024-03-27 18:30:02,659 - evaluate_model.log - INFO - Initialised model evaluation. Predictions table loaded from location /content/drive/MyDrive/NLP_Project_New/model_comparisons/smaller_test-predictions_comparison_new.tsv
INFO:evaluate_model.log:Initialised model evaluation. Predictions table loaded from location /content/drive/MyDrive/NLP_Project_New/model_comparisons/smaller_test-predictions_comparison_new.tsv


In [None]:
# get overview over possible attempts:
print("The available attempts are:\n")
for col in df.columns[2:]:
    model_i = col[:-16]
    date_i = col[-14:]
    print(f"Name: {col}\t(Model: {model_i}, Date: {date_i})")

The available attempts are:

Name: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_2_part--03-27-17-33-57	(Model: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_2_part, Date: 03-27-17-33-57)
Name: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_4_part--03-27-17-33-57	(Model: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_4_part, Date: 03-27-17-33-57)
Name: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_8_part--03-27-17-33-57	(Model: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_8_part, Date: 03-27-17-33-57)
Name: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_16_part--03-27-17-33-57	(Model: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_16_part, Date: 03-27-17-33-57)
Name: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_32_part--03-27-17-33-57	(Model: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_32_part, Date: 03-27-17-33-57)
Name: /content/drive/M

In [None]:
# which attempt do we want to evaluate?
attempt = input("Please enter the name of the attempt you want to evaluate.")
if attempt in df.columns[2:]:
    logger.info(f"Chosen attempt to evaluate: {attempt}")
else:
    logger.error(f"The chosen attempt name ({attempt}) does not match the attempts in the prediciton s table!")
    raise Exception("The attempt entered does not match the attempts in the predictions table!")

Please enter the name of the attempt you want to evaluate./content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_64_part--03-27-17-33-57


2024-03-27 18:38:49,000 - evaluate_model.log - INFO - Chosen attempt to evaluate: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_64_part--03-27-17-33-57
INFO:evaluate_model.log:Chosen attempt to evaluate: /content/drive/MyDrive/NLP_Project_New/models/bert_finetuned_64_part--03-27-17-33-57


In [None]:
df_numeric = df[["NER", attempt]]
df_numeric.loc[:,"NER"] = df_numeric["NER"].apply(helper)
df_numeric.loc[:,attempt] = df_numeric[attempt].apply(helper)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_numeric.loc[:,"NER"] = df_numeric["NER"].apply(helper)
  df_numeric.loc[:,"NER"] = df_numeric["NER"].apply(helper)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_numeric.loc[:,attempt] = df_numeric[attempt].apply(helper)
  df_numeric.loc[:,attempt] = df_numeric[attempt].apply(helper)


In [None]:
false_positives = 0
false_negatives = 0
true_positives = 0
true_negatives = 0

for index, row in df_numeric.iterrows():
    a = row['NER']
    b = row[attempt]

    if a < b:
        false_positives += 1
    elif a > b:
        false_negatives += 1
    elif (a == b) and (a > 0):
        true_positives += 1
    else:
        true_negatives += 1

print(f"True Positives: {true_positives}, False Positives: {false_positives}, True Negatives: {true_negatives}, False Negatives: {false_negatives}")

correct_predictions = true_positives + true_negatives
false_predictions = false_positives + false_negatives
all_predictions = correct_predictions + false_predictions

precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives)
f1 = (2 * precision * recall) / (precision + recall)

print(f"True Positives:\t\t{true_positives}\nTrue Negatives:\t\t{true_negatives}\nFalse Positives:\t{false_positives}\nFalse Negatives:\t{false_negatives}\n")
print(f"Correct predictions:\t{correct_predictions}\nFalse predictions:\t{false_predictions}\nAll predictions:\t{all_predictions}\n")
print(f"Therefore accuracy:\t{correct_predictions} ÷ {all_predictions} = {correct_predictions / all_predictions}\n")

logger.info(f"EVALUATION RESULTS:\nPrecision:\t\t{precision}\nRecall:\t\t\t{recall}\nAccuracy:\t\t{accuracy}\nF1-score:\t\t{f1}\n")

2024-03-27 18:39:03,379 - evaluate_model.log - INFO - EVALUATION RESULTS:
Precision:		0.011811023622047244
Recall:			0.00293733681462141
Accuracy:		0.9766249869559079
F1-score:		0.004704652378463146

INFO:evaluate_model.log:EVALUATION RESULTS:
Precision:		0.011811023622047244
Recall:			0.00293733681462141
Accuracy:		0.9766249869559079
F1-score:		0.004704652378463146



True Positives: 9, False Positives: 753, True Negatives: 159092, False Negatives: 3055
True Positives:		9
True Negatives:		159092
False Positives:	753
False Negatives:	3055

Correct predictions:	159101
False predictions:	3808
All predictions:	162909

Therefore accuracy:	159101 ÷ 162909 = 0.9766249869559079



In [None]:
# maybe add some commentary about the attempt
comments = input("Add comments about this attempt here.")

Add comments about this attempt here.bert model fine tuned on 1/64 of training data


In [None]:
eval_results = {'attempt': attempt,
                'time': timestamp,
                'model_name': attempt[:-16],
                'precision': precision,
                'recall': recall,
                'accuracy': accuracy,
                'F1-score': f1,
                'comments': comments}

In [None]:
savepath = f'{path}/model_comparisons/smaller_model_results_new.csv'

if os.path.isfile(savepath):
    model_comp_df = pd.read_csv(savepath)
    model_comp_df = model_comp_df.append(eval_results, ignore_index=True)
    model_comp_df.to_csv(savepath, index=False)
    logger.info(f"Appended evaluation results to table at location {savepath}.")
else:
    model_comp_df = pd.DataFrame(columns = ['attempt', 'time', 'model_name', 'precision', 'recall', 'accuracy', 'F1-score', 'comments'])
    model_comp_df = model_comp_df.append(eval_results, ignore_index=True)
    model_comp_df.to_csv(savepath, index=False)
    logger.info(f"Couldn't find a table for evaluation results, so I created one at location {savepath}")

  model_comp_df = model_comp_df.append(eval_results, ignore_index=True)
2024-03-27 18:39:29,045 - evaluate_model.log - INFO - Appended evaluation results to table at location /content/drive/MyDrive/NLP_Project_New/model_comparisons/smaller_model_results_new.csv.
INFO:evaluate_model.log:Appended evaluation results to table at location /content/drive/MyDrive/NLP_Project_New/model_comparisons/smaller_model_results_new.csv.


In [None]:
model_comp_df.head(10)

Unnamed: 0,attempt,time,model_name,precision,recall,accuracy,F1-score,comments
0,/content/drive/MyDrive/NLP_Project_New/models/...,03-27-18-30-01,/content/drive/MyDrive/NLP_Project_New/models/...,0.773138,0.592739,0.98919,0.671026,bert model fine tuned on half the training data
1,/content/drive/MyDrive/NLP_Project_New/models/...,03-27-18-30-01,/content/drive/MyDrive/NLP_Project_New/models/...,0.765071,0.568511,0.988705,0.652305,bert model fine tuned on 1/4 the training data
2,/content/drive/MyDrive/NLP_Project_New/models/...,03-27-18-30-01,/content/drive/MyDrive/NLP_Project_New/models/...,0.780526,0.497374,0.987987,0.60758,bert model fine tuned on 1/8 the training data
3,/content/drive/MyDrive/NLP_Project_New/models/...,03-27-18-30-01,/content/drive/MyDrive/NLP_Project_New/models/...,0.603212,0.427376,0.984341,0.500294,bert model fine tuned on 1/16 of training data
4,/content/drive/MyDrive/NLP_Project_New/models/...,03-27-18-30-01,/content/drive/MyDrive/NLP_Project_New/models/...,0.011811,0.002937,0.976625,0.004705,bert model fine tuned on 1/64 of training data
