# Data Process
For each comment, if more than half peple lable it as the same value, then we select it.Otherwise, we discard it.

In [52]:
import csv
import pandas as pd

def process_rating(rating_str):
    rating_str = str(rating_str).strip()
    labels = list(map(int, rating_str.split('/')))
    
    # If all ratings are the same, return that value directly
    if len(set(labels)) == 1:
        return labels[0]

    total_votes = len(labels)
    count_1 = labels.count(1)
    count_0 = labels.count(0)
    count_minus1 = labels.count(-1)
    
    # Determine if any label is in the majority (> half)
    if count_1 > total_votes / 2:
        return 1
    elif count_0 > total_votes / 2:
        return 0
    elif count_minus1 > total_votes / 2:
        return -1
    else:
        # In case of a tie, return -1
        return -1
# Read the entire TSV file using tab as the separator
df = pd.read_csv('a3_train_final.tsv', header=None, sep='\t')
# Process the first column (index 0) to generate the consensus label
df['cleaned_rating'] = df.iloc[:, 0].apply(process_rating)

print(count)
# Display the cleaned DataFrame
print(df.head(5))

# save data
df.to_csv('cleaned_data.tsv', sep='\t', index=False)

negative_count = (df['cleaned_rating'] != -1).sum()
print("Number of 0/1 labels:", negative_count)

# Electra Model
## Train on the trainning data

In [5]:
import pandas as pd
import torch
import numpy as np
from transformers import ElectraTokenizer, ElectraForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
import os

os.environ["WANDB_DISABLED"] = "true"

file_path = "/content/drive/MyDrive/a3_train_consis.csv"
df = pd.read_csv(file_path, skiprows=1)

model_name = "google/electra-base-discriminator"
tokenizer = ElectraTokenizer.from_pretrained(model_name)

class CommentDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        comment = str(self.dataframe.iloc[idx]["Comment"])
        label = int(self.dataframe.iloc[idx]["value"])

        encoding = self.tokenizer(
            comment,
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long)
        }

train_dataset = CommentDataset(df, tokenizer)

model = ElectraForSequenceClassification.from_pretrained(model_name, num_labels=2)

training_args = TrainingArguments(
    output_dir="./final_model1",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="no",
    save_strategy="epoch",
    load_best_model_at_end=False,
    logging_dir="./logs",
    fp16=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

trainer.train()

trainer.save_model("/content/drive/MyDrive/final_model")


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss
500,0.357
1000,0.181
1500,0.0848


## Test on the test set

In [7]:
from torch.nn.functional import softmax
from sklearn.metrics import classification_report
file_path = "/content/drive/MyDrive/a3_test.tsv"
df = pd.read_csv(file_path, sep="\t", header=None, names=["label", "comment"])
best_model = ElectraForSequenceClassification.from_pretrained("/content/drive/MyDrive/final_model")
best_model = best_model.to(trainer.model.device)
correct = 0
total = len(df)
predictions = []
true_labels = []
for _, row in df.iterrows():
    label = int(row["label"])
    comment = row["comment"]

    inputs = tokenizer(comment, return_tensors="pt", truncation=True, padding=True).to(trainer.model.device)

    with torch.no_grad():
        outputs = best_model(**inputs)

    logits = outputs.logits
    probs = softmax(logits, dim=-1)
    predicted_label = torch.argmax(probs, dim=-1).item()
    predictions.append(predicted_label)
    true_labels.append(label)
    if predicted_label == label:
        correct += 1

accuracy = correct / total
print(f"accuracy on test set: {accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(true_labels, predictions))

accuracy on test set: 0.9251

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.90      0.92       267
           1       0.91      0.95      0.93       267

    accuracy                           0.93       534
   macro avg       0.93      0.93      0.93       534
weighted avg       0.93      0.93      0.93       534



# Bert model
## Train on the trainning data

In [4]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, classification_report
import time
import datetime
import torch.optim as optim
from tqdm.auto import tqdm
data_path = 'a3_train_consis_cleaned.csv'
df = pd.read_csv(data_path)
selected_df = df.iloc[:, [1, -1]]
selected_df.columns = ['text', 'label']
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(selected_df, test_size=0.2, random_state=42)

class CommentDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.data.iloc[index]['text'])
        label = int(self.data.iloc[index]['label'])
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

#BERT tokenizer
MODEL_NAME = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
train_dataset = CommentDataset(train_df, tokenizer, max_len=128)
val_dataset = CommentDataset(val_df, tokenizer, max_len=128)

#DataLoader
BATCH_SIZE = 16

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# pre-trained BERT MODEL
model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

# Training function
def format_time(elapsed):
    return str(datetime.timedelta(seconds=int(round(elapsed))))

def train_epoch(model, dataloader, optimizer, scheduler):
    model.train()
    total_loss = 0
    # tqdm: display the process of training
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    for batch in progress_bar:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
    
        progress_bar.set_postfix(loss=loss.item())
    avg_loss = total_loss / len(dataloader)
    return avg_loss
# evaluation
def eval_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()
            
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1).flatten()
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    avg_loss = total_loss / len(dataloader)
    acc = accuracy_score(true_labels, predictions)
    return avg_loss, acc, predictions, true_labels
# superparameter setting
EPOCHS = 4
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
total_steps = len(train_loader) * EPOCHS

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

# training 
print("Training start！")
for epoch in range(EPOCHS):
    print(f"======== Epoch {epoch+1} / {EPOCHS} ========")
    t0 = time.time()
    
    train_loss = train_epoch(model, train_loader, optimizer, scheduler)
    training_time = format_time(time.time() - t0)
    print(f"Training loss: {train_loss:.3f}，time: {training_time}")

    val_loss, val_acc, _, _ = eval_model(model, val_loader)
    print(f"valuation loss: {val_loss:.3f}，accuracy: {val_acc:.3f}")

print("Training complected！")
# Evaluation report
_, _, preds, true_labels = eval_model(model, val_loader)
print("Report:")
print(classification_report(true_labels, preds, target_names=['Anti-Vaccine', 'Pro-Vaccine']))

# Save model for next time using 
model_save_path = 'bert_vaccine_classifier_m1.pt'
torch.save(model.state_dict(), model_save_path)
print(f"The model has been saved as {model_save_path}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda
Training start！


Training:   0%|          | 0/480 [00:00<?, ?it/s]

Training loss: 0.421，time: 0:01:41
valuation loss: 0.313，accuracy: 0.871


Training:   0%|          | 0/480 [00:00<?, ?it/s]

Training loss: 0.194，time: 0:01:41
valuation loss: 0.371，accuracy: 0.885


Training:   0%|          | 0/480 [00:00<?, ?it/s]

Training loss: 0.096，time: 0:01:40
valuation loss: 0.468，accuracy: 0.896


Training:   0%|          | 0/480 [00:00<?, ?it/s]

Training loss: 0.051，time: 0:01:41
valuation loss: 0.544，accuracy: 0.894
Training complected！
Report:
              precision    recall  f1-score   support

Anti-Vaccine       0.90      0.88      0.89       928
 Pro-Vaccine       0.89      0.91      0.90       991

    accuracy                           0.89      1919
   macro avg       0.89      0.89      0.89      1919
weighted avg       0.89      0.89      0.89      1919

The model has been saved as bert_vaccine_classifier_m1.pt


## Test on the test set

In [6]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

class CommentDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.data.iloc[index]['text'])
        label = int(self.data.iloc[index]['label']) if 'label' in self.data.columns else -1
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

model = BertForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
model_save_path = 'bert_vaccine_classifier_m1.pt'  
model.load_state_dict(torch.load(model_save_path, map_location=device))
model.to(device)
model.eval()
#load data 
test_df = pd.read_csv('a3_test.tsv', header=None,sep='\t')
test_df.columns = ['label','text']

test_dataset = CommentDataset(test_df, tokenizer, max_len=128)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

predictions = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probs = F.softmax(logits, dim=1)
        preds = torch.argmax(probs, dim=1).cpu().numpy()
        predictions.extend(preds)
        
true_labels = test_df['label'].values
correct = (true_labels == predictions).sum()
accuracy = correct / len(true_labels)
print(f"accuracy in separate test set=: {accuracy*100:.2f}%")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


accuracy in separate test set=: 88.95%


# XLNet model
## Train on the trainning data

In [None]:
from transformers import XLNetTokenizer, XLNetForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import load_dataset
import os
os.environ["WANDB_DISABLED"] = "true"
# load dataset
raw_dataset = load_dataset("csv", data_files="/content/drive/MyDrive/vaccine_comments.csv")["train"]

dataset = raw_dataset.train_test_split(test_size=0.2)
train_ds = dataset["train"]
test_ds = dataset["test"]

# Loading the Segmenter and Pre-training Model
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=2)

def tokenize_function(examples):
    texts = [str(t) for t in examples["Comment"]]
    return tokenizer(texts, truncation=True, padding="max_length", max_length=128)

tokenized_train_ds = train_ds.map(tokenize_function, batched=True)
tokenized_test_ds = test_ds.map(tokenize_function, batched=True)

def rename_label(example):
    example["labels"] = example["value"]
    return example

tokenized_train_ds = tokenized_train_ds.map(rename_label)
tokenized_test_ds = tokenized_test_ds.map(rename_label)

tokenized_train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# Configuring Training Parameters
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/results",          
    evaluation_strategy="epoch",     
    save_strategy="epoch",                       
    metric_for_best_model="eval_loss",        
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,            
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_ds,
    eval_dataset=tokenized_test_ds,
    tokenizer=tokenizer,
)

trainer.train()

# save the model
model.save_pretrained("/content/drive/MyDrive/second/fine_tuned_xlnet")
tokenizer.save_pretrained("/content/drive/MyDrive/second/fine_tuned_xlnet")


Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/7676 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/467M [00:00<?, ?B/s]

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Map:   0%|          | 0/7676 [00:00<?, ? examples/s]

Map:   0%|          | 0/1919 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.5178,0.392351
2,0.3184,0.392699
3,0.1732,0.473349
4,0.1,0.509924
5,0.0645,0.524144


('/content/drive/MyDrive/second/fine_tuned_xlnet/tokenizer_config.json',
 '/content/drive/MyDrive/second/fine_tuned_xlnet/special_tokens_map.json',
 '/content/drive/MyDrive/second/fine_tuned_xlnet/spiece.model',
 '/content/drive/MyDrive/second/fine_tuned_xlnet/added_tokens.json')

## Test on the test set

In [None]:
import pandas as pd
import torch
from transformers import XLNetTokenizer, XLNetForSequenceClassification
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the trained model
tokenizer = XLNetTokenizer.from_pretrained("/content/drive/MyDrive/second/fine_tuned_xlnet")
model = XLNetForSequenceClassification.from_pretrained("/content/drive/MyDrive/second/fine_tuned_xlnet")
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

df = pd.read_csv("/content/drive/MyDrive/predictions.csv") 

def predict_text(text):
    inputs = tokenizer(text, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    inputs = {key: val.to(device) for key, val in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    predicted_label = torch.argmax(outputs.logits, dim=1).item()
    return predicted_label

df["predicted"] = df["Comment"].apply(lambda x: predict_text(x))

y_true = df["value"]
y_pred = df["predicted"]

accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Accuracy: {:.4f}".format(accuracy))
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

df.to_csv("/content/drive/MyDrive/validation_with_predictions.csv", index=False)


Accuracy: 0.9101
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.88      0.91       267
           1       0.89      0.94      0.91       267

    accuracy                           0.91       534
   macro avg       0.91      0.91      0.91       534
weighted avg       0.91      0.91      0.91       534

Confusion Matrix:
 [[235  32]
 [ 16 251]]


# Merge three models  predict results

In [5]:
import pandas as pd
df_xlnet = pd.read_csv('/content/drive/MyDrive/validation_with_predictions.csv')
df_bert = pd.read_csv('/content/drive/MyDrive/test_predictions.tsv', sep='\t')
df_ele = pd.read_csv('/content/drive/MyDrive/a3_train_with_predictions.tsv', sep='\t')
df_ele.loc[-1]=[ 0, "test", 0]
df_ele = df_ele.sort_index().reset_index(drop=True)
df = pd.DataFrame(columns=['label', 'text', 'bert', 'xlnet', 'electra'])
df['label'] = df_bert['label']
df['text'] = df_bert['text']
df['bert'] = df_bert['predicted_label']
df['xlnet'] = df_xlnet['predicted']
df['electra'] = df_ele['0.1']
print(df.head())
df.to_csv('result.csv', index=False)
df.to_excel('result.xlsx', index=False)

   label                                               text  bert  xlnet  \
0      0  Extremely rare is only good if it doesn't happ...     0      1   
1      1  I have two parents in their 70s. Both had the ...     1      0   
2      0  Not getting vaccinated is still more dangerous...     0      1   
3      1  The average life expectancy of a human is 74 y...     1      1   
4      1  Trust the science is a dumb saying. Science is...     1      1   

   electra  
0        0  
1        1  
2        1  
3        1  
4        1  


# Ensemble Model：Majority vote

In [None]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/result.csv')

bert_accuracy = (df["bert"] == df["label"]).mean()
xlnet_accuracy = (df["xlnet"] == df["label"]).mean()
electra_accuracy = (df["electra"] == df["label"]).mean()

print(f"BERT Accuracy:    {bert_accuracy:.4f}")
print(f"XLNet Accuracy:   {xlnet_accuracy:.4f}")
print(f"ELECTRA Accuracy: {electra_accuracy:.4f}")

# majority vote
df["vote_sum"] = df["bert"] + df["xlnet"] + df["electra"]
df["ensemble_pred"] = (df["vote_sum"] >= 2).astype(int)

# computer the accuracy 
ensemble_accuracy = (df["ensemble_pred"] == df["label"]).mean()
print(f"Ensemble Accuracy (Majority Vote): {ensemble_accuracy:.4f}")


BERT Accuracy:    0.9045
XLNet Accuracy:   0.8970
ELECTRA Accuracy: 0.9419
Ensemble Accuracy (Majority Vote): 0.9345


# Final result

In [7]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
df = pd.read_csv('/content/drive/MyDrive/result.csv')
df.head()

Unnamed: 0,label,text,bert,xlnet,electra
0,0,Extremely rare is only good if it doesn't happ...,0,0,0
1,1,I have two parents in their 70s. Both had the ...,1,1,1
2,0,Not getting vaccinated is still more dangerous...,0,1,1
3,1,The average life expectancy of a human is 74 y...,1,1,1
4,1,Trust the science is a dumb saying. Science is...,1,1,1


In [8]:
df["vote_sum"] = df["bert"] + df["xlnet"] + df["electra"]
df["ensemble_pred"] = (df["vote_sum"] >= 2).astype(int)
y_pred = df["ensemble_pred"]
y_true = df["label"]
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)
conf_matrix = confusion_matrix(y_true, y_pred)
print("Accuracy: {:.4f}".format(accuracy))
print("Classification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9345
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.91      0.93       267
           1       0.92      0.96      0.94       267

    accuracy                           0.93       534
   macro avg       0.94      0.93      0.93       534
weighted avg       0.94      0.93      0.93       534

Confusion Matrix:
 [[244  23]
 [ 12 255]]


In [9]:

# Print the comments that predict wrong.
for index, row in df.iterrows():
    if row['label'] != row['ensemble_pred']:
        print(f"Index: {index}, Text: {row['text']}")
        print(f"  True Label: {row['label']}")
        print(f"  Predicted Label: {row['ensemble_pred']}")
        print(f"  Bert Prediction: {row['bert']}")
        print(f"  Xlnet Prediction: {row['xlnet']}")
        print(f"  Electra Prediction: {row['electra']}")
        print("-" * 20)


Index: 2, Text: Not getting vaccinated is still more dangerous. Uh-huh. Right.
  True Label: 0
  Predicted Label: 1
  Bert Prediction: 0
  Xlnet Prediction: 1
  Electra Prediction: 1
--------------------
Index: 18, Text: Advance R.I.P. to those who will take covid vaccines.
  True Label: 0
  Predicted Label: 1
  Bert Prediction: 1
  Xlnet Prediction: 1
  Electra Prediction: 1
--------------------
Index: 21, Text: Ah crap I got polio. BuT AtlEaST i dOnT haVE AutIsM
  True Label: 1
  Predicted Label: 0
  Bert Prediction: 0
  Xlnet Prediction: 0
  Electra Prediction: 1
--------------------
Index: 23, Text: All of the anti-vaxxers out there I've got one message for all of you I salute you keep up the good work
  True Label: 0
  Predicted Label: 1
  Bert Prediction: 1
  Xlnet Prediction: 1
  Electra Prediction: 1
--------------------
Index: 87, Text: Got vaccinated today and all my body’s aching like hell!!
  True Label: 0
  Predicted Label: 1
  Bert Prediction: 1
  Xlnet Prediction: 1
  El