In [None]:
import os
import csv
import pandas as pd
import numpy as np
from tqdm import tqdm
import wandb
import logging

import pickle
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split

LABELS = [0, 1, 2, 3]
RELATED = [0, 1, 2]

def fnc(path_headlines, path_bodies):

    map = {'agree': 0, 'disagree':1, 'discuss':2, 'unrelated':3}

    with open(path_bodies, encoding='utf_8') as fb:  # Body ID,articleBody
        body_dict = {}
        lines_b = csv.reader(fb)
        for i, line in enumerate(tqdm(list(lines_b), ncols=80, leave=False)):
            if i > 0:
                body_id = int(line[0].strip())
                body_dict[body_id] = line[1]

    with open(path_headlines, encoding='utf_8') as fh: # Headline,Body ID,Stance
        lines_h = csv.reader(fh)
        h = []
        b = []
        l = []
        for i, line in enumerate(tqdm(list(lines_h), ncols=80, leave=False)):
            if i > 0:
                body_id = int(line[1].strip())
                label = line[2].strip()
                if label in map and body_id in body_dict:
                    h.append(line[0])
                    l.append(map[line[2]])
                    b.append(body_dict[body_id])
    return h, b, l

data_dir = ''
headlines, bodies, labels = fnc(
    os.path.join(data_dir, 'train_stances.csv'),
    os.path.join(data_dir, 'train_bodies.csv')
)

list_of_tuples = list(zip(headlines, bodies, labels))
df = pd.DataFrame(list_of_tuples, columns=['text_a', 'text_b', 'labels'])
train_df, val_df = train_test_split(df)
labels_val = pd.Series(val_df['labels']).to_numpy()

headlines, bodies, labels = fnc(
    os.path.join(data_dir, 'competition_test_stances.csv'),
    os.path.join(data_dir, 'competition_test_bodies.csv')
)

list_of_tuples = list(zip(headlines, bodies, labels))
test_df = pd.DataFrame(list_of_tuples, columns=['text_a', 'text_b', 'labels'])
labels_test = pd.Series(test_df['labels']).to_numpy()



def calculate_f1_scores(y_true, y_predicted):
    f1_macro = f1_score(y_true, y_predicted, average='macro')
    f1_classwise = f1_score(y_true, y_predicted, average=None, labels=[0, 1, 2, 3])

    resultstring = "F1 macro: {:.3f}".format(f1_macro * 100) + "% \n"
    resultstring += "F1 agree: {:.3f}".format(f1_classwise[0] * 100) + "% \n"
    resultstring += "F1 disagree: {:.3f}".format(f1_classwise[1] * 100) + "% \n"
    resultstring += "F1 discuss: {:.3f}".format(f1_classwise[2] * 100) + "% \n"
    resultstring += "F1 unrelated: {:.3f}".format(f1_classwise[3] * 100) + "% \n"

    return resultstring


def print_confusion_matrix(cm):
    lines = ['CONFUSION MATRIX:']
    header = "|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format('', *LABELS)
    line_len = len(header)
    lines.append("-"*line_len)
    lines.append(header)
    lines.append("-"*line_len)
    hit = 0
    total = 0
    for i, row in enumerate(cm):
        hit += row[i]
        total += sum(row)
        lines.append("|{:^11}|{:^11}|{:^11}|{:^11}|{:^11}|".format(LABELS[i], *row))
        lines.append("-"*line_len)
    lines.append("ACCURACY: {:.3f}".format((hit / total)*100) + "%")
    print('\n'.join(lines))

def fnc_score_cm(predicted_labels, target):
    score = 0.0
    cm = [[0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0],
          [0, 0, 0, 0]]
    for i, (g, t) in enumerate(zip(predicted_labels, target)):
            if g == t:
                score += 0.25
                if g != 3:
                    score += 0.50
            if g in RELATED and t in RELATED:
                score += 0.25

            cm[g][t] += 1
    return score,  cm

In [None]:
wandb.login(key='8bc03e3a805535fd278efa47fca6bf2ca5793823')

In [None]:
sweep_config = {
    "method": "bayes",  # grid, random
    "metric": {"name": "train_loss", "goal": "minimize"},
    "parameters": {
        "num_train_epochs": {"values": [3, 5, 7]},
        "learning_rate": {"min": 1e-5, 
                          "max": 4e-4},
        'dropout': {'values': [0.3, 0.4, 0.5, 0.6, 0.7]},
        "optimizer": {"values": ['AdamW']},
        "train_batch_size": {"min": 4, 
                             "max":12, 
                             "distribution": "int_uniform"},
        "fc_layer_size": {'values': [128, 256, 512]}
    }
}

sweep_id = wandb.sweep(sweep_config, project="fnc_bert_0116_v1")
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
from simpletransformers.model import TransformerModel
from simpletransformers.classification import ClassificationModel, ClassificationArgs

def train():
    # Initialize a new wandb run
    wandb.init()

    # Create a TransformerModel
    model = TransformerModel('bert', 'bert-base-uncased', num_labels=4, use_cuda=True, sweep_config=wandb.config, args={
        'reprocess_input_data': True,
        'overwrite_output_dir': True,
        'process_count': 10,
        'eval_batch_size': 4,
        'max_seq_length': 512,
        'fp16': True,
        'gradient_accumulation_steps': 1,
        'tensorboard_dir': '',
        'evaluate_during_training': True,
        'manual_seed': 4,
        'use_multiprocessing': True
    })

    # Train the model
    model.train_model(train_df, eval_df=test_df)

    # Evaluate the model
    model.eval_model(test_df)

    # Sync wandb
    wandb.join()

    _, model_outputs_test, _ = model.eval_model(test_df)
    preds_test = np.argmax(model_outputs_test, axis=1)
    print(calculate_f1_scores(preds_test, labels_test))
    fnc_score, cm_test = fnc_score_cm(preds_test, labels_test)
    print("\nRelative FNC Score: {:.3f}".format(100/13204.75*fnc_score) + "% \n")
    eval_report = classification_report(labels_test, preds_test)
    print('Test report\n', eval_report)
    cm = confusion_matrix(preds_test, labels_test)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=LABELS)
    print(disp.plot())


In [None]:
wandb.agent(sweep_id, train, count=15)