<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/Final_MeanTeacher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from typing_extensions import final
import torch
import pandas as pd
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from tqdm import tqdm
from google.colab import files
from transformers import AutoModelForSequenceClassification

# Load the XLM-RoBERTa tokenizer
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

# Assuming you're using Google Colab and uploaded a file
uploaded = files.upload()

# Read the Excel file
df = pd.read_excel(pd.ExcelFile(list(uploaded.keys())[0]), header=0)
df = df.sample(frac=0.4, random_state=42)
# Tokenize the text data in the dataset
tweets_column = 'tweet'
labels_column = 'label'
NUM_LABELS = len(df[labels_column].unique())
possible_labels = df[labels_column].unique()
label_dict = {possible_label: index for index, possible_label in enumerate(possible_labels)}
df['labels'] = df[labels_column].map(label_dict)

# Split the dataset into labeled (20%), unlabeled (60%), and test (20%) sets
df_labeled, df_temp = train_test_split(df, stratify=df[labels_column], test_size=0.8)
df_unlabeled, df_test = train_test_split(df_temp, stratify=df_temp[labels_column], test_size=0.25)

# Tokenize the labeled data for training
encoded_data_train = tokenizer.batch_encode_plus(
    df_labeled[tweets_column].tolist(),
    add_special_tokens=True,
    return_attention_mask=True,
    pad_to_max_length=True,
    max_length=256,
    return_tensors='pt'
)

input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df_labeled['labels'].values)

# Tokenize the unlabeled data
encoded_data_unlabeled = tokenizer.batch_encode_plus(
    df_unlabeled[tweets_column].tolist(),
    add_special_tokens=True,
    return_attention_mask=True,
    pad_to_max_length=True,
    max_length=256,
    return_tensors='pt'
)

input_ids_unlabeled = encoded_data_unlabeled['input_ids']
attention_masks_unlabeled = encoded_data_unlabeled['attention_mask']

# Tokenize the test data
encoded_data_test = tokenizer.batch_encode_plus(
    df_test[tweets_column].tolist(),
    add_special_tokens=True,
    return_attention_mask=True,
    pad_to_max_length=True,
    max_length=256,
    return_tensors='pt'
)

input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(df_test['labels'].values)

dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_unlabeled = TensorDataset(input_ids_unlabeled, attention_masks_unlabeled)
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

# Define the XLM-RoBERTa model for sequence classification
student_model = AutoModelForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=NUM_LABELS)
teacher_model = AutoModelForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=NUM_LABELS)

# Set up the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
student_model.to(device)
teacher_model.to(device)

# Copy the student model parameters to the teacher model
teacher_model.load_state_dict(student_model.state_dict())

# Set up the optimizer and scheduler
optimizer = AdamW(student_model.parameters(), lr=1e-5, eps=1e-8)
epochs = 5
batch_size = 4
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(dataset_train) * epochs)

# Define accuracy and F1 score functions
def compute_metrics(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    accuracy = accuracy_score(labels_flat, preds_flat)
    f1 = f1_score(labels_flat, preds_flat, average='weighted')
    precision = precision_score(labels_flat, preds_flat, average='weighted')
    recall = recall_score(labels_flat, preds_flat, average='weighted')
    return accuracy, f1, precision, recall

# Training loop with Mean Teacher logic
alpha = 0.999  # EMA decay rate

for epoch in range(1, epochs + 1):
    student_model.train()
    loss_train_total = 0
    progress_bar = tqdm(DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=batch_size), desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:
        student_model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

        outputs_student = student_model(**inputs)
        loss = outputs_student.loss
        logits_student = outputs_student.logits

        loss_train_total += loss.item()

        # Forward pass on unlabeled data for consistency loss
        unlabeled_batch = next(iter(DataLoader(dataset_unlabeled, sampler=RandomSampler(dataset_unlabeled), batch_size=batch_size)))
        unlabeled_batch = tuple(b.to(device) for b in unlabeled_batch)
        with torch.no_grad():
            outputs_teacher = teacher_model(input_ids=unlabeled_batch[0], attention_mask=unlabeled_batch[1])
            logits_teacher = outputs_teacher.logits

        # Consistency loss between student and teacher logits on unlabeled data
        consistency_loss = F.mse_loss(logits_student, logits_teacher)
        total_loss = loss + consistency_loss
        total_loss.backward()

        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        # Update teacher model with EMA
        for teacher_param, student_param in zip(teacher_model.parameters(), student_model.parameters()):
            teacher_param.data = alpha * teacher_param.data + (1.0 - alpha) * student_param.data

        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(total_loss.item() / len(batch))})

    loss_train_avg = loss_train_total / len(dataset_train)
    tqdm.write(f'\nEpoch {epoch}')
    tqdm.write(f'Training loss: {loss_train_avg}')

    # Evaluation on validation data
    student_model.eval()
    teacher_model.eval()
    loss_val_total = 0
    predictions_student, true_vals = [], []
    predictions_teacher = []

    for batch in tqdm(DataLoader(dataset_test, sampler=SequentialSampler(dataset_test), batch_size=batch_size), desc='Evaluating', leave=False, disable=False):
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        with torch.no_grad():
            outputs_student = student_model(**inputs)
            outputs_teacher = teacher_model(**inputs)
            logits_student = outputs_student.logits
            logits_teacher = outputs_teacher.logits

        loss = outputs_student.loss
        loss_val_total += loss.item()

        logits_student = logits_student.detach().cpu().numpy()
        logits_teacher = logits_teacher.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()

        predictions_student.append(logits_student)
        predictions_teacher.append(logits_teacher)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total / len(dataset_test)

    predictions_student = np.concatenate(predictions_student, axis=0)
    predictions_teacher = np.concatenate(predictions_teacher, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)

    val_accuracy_student, val_f1_student, val_precision_student, val_recall_student = compute_metrics(predictions_student, true_vals)
    val_accuracy_teacher, val_f1_teacher, val_precision_teacher, val_recall_teacher = compute_metrics(predictions_teacher, true_vals)

    tqdm.write(f'Validation loss: {loss_val_avg}')
    tqdm.write(f'Student Model - Accuracy: {val_accuracy_student}, F1 Score: {val_f1_student}, Precision: {val_precision_student}, Recall: {val_recall_student}')
    tqdm.write(f'Teacher Model - Accuracy: {val_accuracy_teacher}, F1 Score: {val_f1_teacher}, Precision: {val_precision_teacher}, Recall: {val_recall_teacher}')

# Final model selection based on validation performance
final_model = teacher_model

# Evaluation on test data
dataloader_test = DataLoader(dataset_test, sampler=RandomSampler(dataset_test), batch_size=batch_size)

def evaluate_test(model, dataloader):
    model.eval()
    loss_test_total = 0
    predictions, true_vals = [], []

    for batch in tqdm(dataloader, desc='Testing', leave=False, disable=False):
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        with torch.no_grad():
            outputs = model(**inputs)
        loss = outputs[0]
        logits = outputs[1]
        loss_test_total += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_test_avg = loss_test_total / len(dataloader)

    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)

    return loss_test_avg, predictions, true_vals

test_loss, test_predictions, test_true_vals = evaluate_test(final_model, dataloader_test)

# Calculate evaluation metrics on test data
test_accuracy, test_f1, test_precision, test_recall = compute_metrics(test_predictions, test_true_vals)

# Print out the evaluation metrics on test data
print(f'Testing Accuracy: {test_accuracy}')
print(f'Testing F1 Score: {test_f1}')
print(f'Testing Precision: {test_precision}')
print(f'Testing Recall: {test_recall}')




Saving Arabic_Depression_10.000_Tweets.xlsx to Arabic_Depression_10.000_Tweets (1).xlsx


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
Yo


Epoch 1
Training loss: 0.16482150811702012


  _warn_prf(average, modifier, msg_start, len(result))


Validation loss: 0.14528782192617654
Student Model - Accuracy: 0.86125, F1 Score: 0.8612810078155401, Precision: 0.8617668547928424, Recall: 0.86125
Teacher Model - Accuracy: 0.515, F1 Score: 0.35013201320132015, Precision: 0.265225, Recall: 0.515





Epoch 2
Training loss: 0.12618363283574582


  _warn_prf(average, modifier, msg_start, len(result))


Validation loss: 0.11776701040565968
Student Model - Accuracy: 0.93125, F1 Score: 0.9312086347575089, Precision: 0.9347437778655681, Recall: 0.93125
Teacher Model - Accuracy: 0.515, F1 Score: 0.35013201320132015, Precision: 0.265225, Recall: 0.515





Epoch 3
Training loss: 0.11636537153273821




Validation loss: 0.12119072675704956
Student Model - Accuracy: 0.91125, F1 Score: 0.9104377300938649, Precision: 0.9220781652614705, Recall: 0.91125
Teacher Model - Accuracy: 0.53125, F1 Score: 0.3853746576012014, Precision: 0.7546060991105463, Recall: 0.53125





Epoch 4
Training loss: 0.11269780553877354




Validation loss: 0.10611998818814755
Student Model - Accuracy: 0.94125, F1 Score: 0.9410230476187482, Precision: 0.9453392651795756, Recall: 0.94125
Teacher Model - Accuracy: 0.66375, F1 Score: 0.617473848968245, Precision: 0.7890045153775023, Recall: 0.66375





Epoch 5
Training loss: 0.10980787966400385




Validation loss: 0.10656859517097474
Student Model - Accuracy: 0.96625, F1 Score: 0.9662562782256363, Precision: 0.966410532895765, Recall: 0.96625
Teacher Model - Accuracy: 0.865, F1 Score: 0.8626082456442987, Precision: 0.8856559382646338, Recall: 0.865


                                                          

Testing Accuracy: 0.865
Testing F1 Score: 0.8626082456442987
Testing Precision: 0.8856559382646338
Testing Recall: 0.865




In [4]:
for epoch in range(1, epochs + 1):
    student_model.train()
    loss_train_total = 0
    progress_bar = tqdm(DataLoader(dataset_train, sampler=RandomSampler(dataset_train), batch_size=batch_size), desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:
        student_model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}

        outputs_student = student_model(**inputs)
        loss = outputs_student.loss
        logits_student = outputs_student.logits

        loss_train_total += loss.item()

        # Forward pass on unlabeled data for consistency loss
        unlabeled_batch = next(iter(DataLoader(dataset_unlabeled, sampler=RandomSampler(dataset_unlabeled), batch_size=batch_size)))
        unlabeled_batch = tuple(b.to(device) for b in unlabeled_batch)
        with torch.no_grad():
            outputs_teacher = teacher_model(input_ids=unlabeled_batch[0], attention_mask=unlabeled_batch[1])
            logits_teacher = outputs_teacher.logits

        # Consistency loss between student and teacher logits on unlabeled data
        consistency_loss = F.mse_loss(logits_student, logits_teacher)
        total_loss = loss + consistency_loss
        total_loss.backward()

        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        # Update teacher model with EMA
        for teacher_param, student_param in zip(teacher_model.parameters(), student_model.parameters()):
            teacher_param.data = alpha * teacher_param.data + (1.0 - alpha) * student_param.data

        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(total_loss.item() / len(batch))})

    loss_train_avg = loss_train_total / len(dataset_train)
    tqdm.write(f'\nEpoch {epoch+5}')
    tqdm.write(f'Training loss: {loss_train_avg}')

    # Evaluation on validation data
    student_model.eval()
    teacher_model.eval()
    loss_val_total = 0
    predictions_student, true_vals = [], []
    predictions_teacher = []

    for batch in tqdm(DataLoader(dataset_test, sampler=SequentialSampler(dataset_test), batch_size=batch_size), desc='Evaluating', leave=False, disable=False):
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        with torch.no_grad():
            outputs_student = student_model(**inputs)
            outputs_teacher = teacher_model(**inputs)
            logits_student = outputs_student.logits
            logits_teacher = outputs_teacher.logits

        loss = outputs_student.loss
        loss_val_total += loss.item()

        logits_student = logits_student.detach().cpu().numpy()
        logits_teacher = logits_teacher.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()

        predictions_student.append(logits_student)
        predictions_teacher.append(logits_teacher)
        true_vals.append(label_ids)

    loss_val_avg = loss_val_total / len(dataset_test)

    predictions_student = np.concatenate(predictions_student, axis=0)
    predictions_teacher = np.concatenate(predictions_teacher, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)

    val_accuracy_student, val_f1_student, val_precision_student, val_recall_student = compute_metrics(predictions_student, true_vals)
    val_accuracy_teacher, val_f1_teacher, val_precision_teacher, val_recall_teacher = compute_metrics(predictions_teacher, true_vals)

    tqdm.write(f'Validation loss: {loss_val_avg}')
    tqdm.write(f'Student Model - Accuracy: {val_accuracy_student}, F1 Score: {val_f1_student}, Precision: {val_precision_student}, Recall: {val_recall_student}')
    tqdm.write(f'Teacher Model - Accuracy: {val_accuracy_teacher}, F1 Score: {val_f1_teacher}, Precision: {val_precision_teacher}, Recall: {val_recall_teacher}')





Epoch 6
Training loss: 0.1089203304424882




Validation loss: 0.1018384636193514
Student Model - Accuracy: 0.9625, F1 Score: 0.9625046904315198, Precision: 0.9625575230092036, Recall: 0.9625
Teacher Model - Accuracy: 0.9425, F1 Score: 0.9423827450980391, Precision: 0.9442734428214637, Recall: 0.9425





Epoch 7
Training loss: 0.10962580349296332




Validation loss: 0.10354109760373831
Student Model - Accuracy: 0.97625, F1 Score: 0.9762446120605358, Precision: 0.9763156263500948, Recall: 0.97625
Teacher Model - Accuracy: 0.955, F1 Score: 0.9549466968331464, Precision: 0.9558654934003772, Recall: 0.955





Epoch 8
Training loss: 0.10973715260624886




Validation loss: 0.11271665539592504
Student Model - Accuracy: 0.92125, F1 Score: 0.9206760082659892, Precision: 0.9297193682000013, Recall: 0.92125
Teacher Model - Accuracy: 0.95875, F1 Score: 0.9586971892232402, Precision: 0.9597321726117494, Recall: 0.95875





Epoch 9
Training loss: 0.1089735259115696




Validation loss: 0.10719700276851654
Student Model - Accuracy: 0.96125, F1 Score: 0.9612538145067666, Precision: 0.9625830264071186, Recall: 0.96125
Teacher Model - Accuracy: 0.96875, F1 Score: 0.9687311651207389, Precision: 0.969079801343191, Recall: 0.96875





Epoch 10
Training loss: 0.10479345593601465


                                                             

Validation loss: 0.10419661659747362
Student Model - Accuracy: 0.965, F1 Score: 0.964951728980165, Precision: 0.966117149758454, Recall: 0.965
Teacher Model - Accuracy: 0.97125, F1 Score: 0.9712366354625291, Precision: 0.9714697698030196, Recall: 0.97125




In [5]:
# Final model selection based on validation performance
final_model = teacher_model

# Evaluation on test data
dataloader_test = DataLoader(dataset_test, sampler=RandomSampler(dataset_test), batch_size=batch_size)

def evaluate_test(model, dataloader):
    model.eval()
    loss_test_total = 0
    predictions, true_vals = [], []

    for batch in tqdm(dataloader, desc='Testing', leave=False, disable=False):
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        with torch.no_grad():
            outputs = model(**inputs)
        loss = outputs[0]
        logits = outputs[1]
        loss_test_total += loss.item()
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)

    loss_test_avg = loss_test_total / len(dataloader)

    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)

    return loss_test_avg, predictions, true_vals

test_loss, test_predictions, test_true_vals = evaluate_test(final_model, dataloader_test)

# Calculate evaluation metrics on test data
test_accuracy, test_f1, test_precision, test_recall = compute_metrics(test_predictions, test_true_vals)

# Print out the evaluation metrics on test data
print(f'Testing Accuracy: {test_accuracy}')
print(f'Testing F1 Score: {test_f1}')
print(f'Testing Precision: {test_precision}')
print(f'Testing Recall: {test_recall}')

                                                          

Testing Accuracy: 0.97125
Testing F1 Score: 0.9712366354625291
Testing Precision: 0.9714697698030196
Testing Recall: 0.97125


