In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification, TFTrainer, TFTrainingArguments

### Load dataset and train-test split

In [3]:
class TrainMLModels:    
    def get_train_test_Variables_FEVEROUS(self):
        '''Preform all data preprocessing tasks and return x_train, y_train, x_test, y_test, y_text_to_integer_mapping'''
        # Import datasets
        data_train = pd.read_json("./data/feverous/feverous_train_challenges.jsonl", lines=True)
        data_test = pd.read_json("./data/feverous/feverous_dev_challenges.jsonl", lines=True)
        
        # Remove rows with null values
        data_train = data_train.dropna()
        data_test = data_test.dropna()
        data_train.drop(['id', 'evidence', 'annotator_operations', 'challenge'], axis=1, inplace=True)
        data_test.drop(['id', 'evidence', 'annotator_operations', 'challenge'], axis=1, inplace=True)

        # Remove rows containing empty label or label as 'NOT ENOUGH INFO'
        data_train['label'] = data_train['label'].str.strip()
        data_test['label'] = data_test['label'].str.strip()
        data_train = data_train[data_train['label'] != 'NOT ENOUGH INFO']
        data_test = data_test[data_test['label'] != 'NOT ENOUGH INFO']
        data_train = data_train[data_train['label'] != '']
        data_test = data_test[data_test['label'] != '']
        
        
        # Label encode claim label as 0 and 1
        data_train['label'] = data_train['label'].map({'SUPPORTS': 1, 'REFUTES': 0})
        data_test['label'] = data_test['label'].map({'SUPPORTS': 1, 'REFUTES': 0})

        x_train = data_train['claim']
        y_train = data_train['label']
        x_test = data_test['claim']
        y_test = data_test['label']

        return (x_train, y_train, x_test, y_test)

In [4]:
x_train, y_train, x_test, y_test = TrainMLModels().get_train_test_Variables_FEVEROUS()

In [5]:
max_seq_length = 512
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [6]:
train_encodings = tokenizer(list(x_train), truncation=True, padding=True)
test_encodings = tokenizer(list(x_test), truncation=True, padding=True)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    y_train
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    y_test
))

In [8]:
training_args = TFTrainingArguments(
    output_dir='./saved_models/bert_feverous/train/results',          # output directory
    num_train_epochs=2,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./saved_models/bert_feverous/train/logs',            # directory for storing logs
    logging_steps=10,
    eval_steps=10,
    resume_from_checkpoint=True
)

In [9]:

with training_args.strategy.scope():
    model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased")

trainer = TFTrainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset             # evaluation dataset
)



All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
trainer.train()

KeyboardInterrupt: 

In [None]:
trainer.evaluate(test_dataset)

In [None]:
output=trainer.predict(test_dataset)[1]

In [None]:
from sklearn.metrics import confusion_matrix

cm=confusion_matrix(y_test,output)
cm

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
trainer.save_model('./saved_models/bert_feverous/trained_model')

In [None]:
# Evaluate the model
eval_result = trainer.evaluate()
print(f"Accuracy: {eval_result['eval_accuracy']:.2f}")
