<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/GNN_TRIED_FOR_ARABIC_part2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import pandas as pd
import numpy as np
from tqdm import tqdm
from google.colab import files

# Load dataset
uploaded = files.upload()
df = pd.read_excel(pd.ExcelFile(list(uploaded.keys())[0]), header=0)
df = df.sample(frac=0.4, random_state=42)
# Graph Preparation
tweets_column = 'tweet'
labels_column = 'label'
NUM_LABELS = len(df[labels_column].unique())
possible_labels = df[labels_column].unique()
label_dict = {possible_label: index for index, possible_label in enumerate(possible_labels)}
df['labels'] = df[labels_column].map(label_dict)

# Split the dataset
df_labeled, df_temp = train_test_split(df, stratify=df[labels_column], test_size=0.8)
df_unlabeled, df_test = train_test_split(df_temp, stratify=df_temp[labels_column], test_size=0.25)


In [7]:

# Correct the graph creation function to have proper feature size
def create_graph_data(df, feature_dim=64):
    num_nodes = len(df)

    # Create random features for nodes as an example (Replace with actual features if available)
    x = torch.randn(num_nodes, feature_dim, dtype=torch.float)  # Node features with 64 dimensions
    labels = torch.tensor(df['labels'].values, dtype=torch.long)

    # Sample graph: Replace this with your logic for creating edges between nodes
    edge_index = torch.tensor([[i, j] for i in range(num_nodes) for j in range(num_nodes) if i != j], dtype=torch.long).t().contiguous()
    data = Data(x=x, edge_index=edge_index, y=labels)
    return data

# Create graph data with corrected feature dimensions
graph_data_labeled = create_graph_data(df_labeled, feature_dim=64)
graph_data_unlabeled = create_graph_data(df_unlabeled, feature_dim=64)
graph_data_test = create_graph_data(df_test, feature_dim=64)

# DataLoader remains the same
dataloader_train = DataLoader([graph_data_labeled], batch_size=batch_size, shuffle=True)
dataloader_unlabeled = DataLoader([graph_data_unlabeled], batch_size=batch_size, shuffle=True)
dataloader_test = DataLoader([graph_data_test], batch_size=batch_size, shuffle=False)

# Define GNN Model with correct input dimensions
class GNNModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Initialize the models with the corrected input feature size
student_model = GNNModel(num_node_features=64, hidden_channels=64, num_classes=NUM_LABELS)
teacher_model = GNNModel(num_node_features=64, hidden_channels=64, num_classes=NUM_LABELS)

# Set up the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
student_model.to(device)
teacher_model.to(device)

# Copy student model parameters to the teacher model
teacher_model.load_state_dict(student_model.state_dict())

# Set up optimizer
optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
epochs = 10
alpha = 0.999  # EMA decay rate

# Define evaluation metrics
def compute_metrics(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    accuracy = accuracy_score(labels_flat, preds_flat)
    f1 = f1_score(labels_flat, preds_flat, average='weighted')
    precision = precision_score(labels_flat, preds_flat, average='weighted')
    recall = recall_score(labels_flat, preds_flat, average='weighted')
    return accuracy, f1, precision, recall

# Update teacher model using EMA of student model
def update_teacher(student_model, teacher_model, alpha):
    for student_param, teacher_param in zip(student_model.parameters(), teacher_model.parameters()):
        teacher_param.data = alpha * teacher_param.data + (1 - alpha) * student_param.data

# Training loop
for epoch in range(1, epochs + 1):
    student_model.train()
    teacher_model.eval()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc=f'Epoch {epoch}', leave=False, disable=False)

    # Train the student on labeled data
    for batch in progress_bar:
        student_model.zero_grad()
        batch = batch.to(device)

        # Forward pass through the student model
        logits_student = student_model(batch.x, batch.edge_index)
        loss = F.cross_entropy(logits_student, batch.y)  # Supervised loss on labeled data

        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item())})

    # Consistency Loss on Unlabeled Data
    loss_consistency_total = 0
    for batch in DataLoader([graph_data_unlabeled], batch_size=batch_size, shuffle=True):
        student_model.zero_grad()
        batch = batch.to(device)

        # Predictions from student and teacher models
        student_preds = student_model(batch.x, batch.edge_index)
        teacher_preds = teacher_model(batch.x, batch.edge_index)

        # Compute consistency loss (e.g., Mean Squared Error between predictions)
        consistency_loss = F.mse_loss(student_preds, teacher_preds.detach())
        loss_consistency_total += consistency_loss.item()

        # Backward pass and optimization
        consistency_loss.backward()
        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    # Update the teacher model using EMA
    update_teacher(student_model, teacher_model, alpha)

    loss_train_avg = loss_train_total / len(dataloader_train)
    loss_consistency_avg = loss_consistency_total / len(dataloader_unlabeled)

    tqdm.write(f'\nEpoch {epoch}')
    tqdm.write(f'Supervised Training Loss: {loss_train_avg}')
    tqdm.write(f'Consistency Loss: {loss_consistency_avg}')

# Evaluation on test data
teacher_model.eval()
loss_test_total = 0
predictions, true_vals = [], []

for batch in tqdm(dataloader_test, desc='Testing', leave=False):
    batch = batch.to(device)
    with torch.no_grad():
        outputs = teacher_model(batch.x, batch.edge_index)
    loss = F.cross_entropy(outputs, batch.y)
    loss_test_total += loss.item()

    preds = outputs.detach().cpu().numpy()
    labels = batch.y.cpu().numpy()

    predictions.append(preds)
    true_vals.append(labels)

predictions = np.concatenate(predictions, axis=0)
true_vals = np.concatenate(true_vals, axis=0)

# Calculate metrics
test_accuracy, test_f1, test_precision, test_recall = compute_metrics(predictions, true_vals)
print(f'Testing Accuracy: {test_accuracy}')
print(f'Testing F1 Score: {test_f1}')
print(f'Testing Precision: {test_precision}')
print(f'Testing Recall: {test_recall}')





Epoch 1
Supervised Training Loss: 0.6938855648040771
Consistency Loss: 8.85823192220414e-06





Epoch 2
Supervised Training Loss: 0.6932591795921326
Consistency Loss: 1.8543927353675826e-06





Epoch 3
Supervised Training Loss: 0.6929776668548584
Consistency Loss: 1.3403496268438175e-07





Epoch 4
Supervised Training Loss: 0.6928291916847229
Consistency Loss: 5.5335672044520834e-08





Epoch 5
Supervised Training Loss: 0.6927558183670044
Consistency Loss: 5.429991034588966e-08





Epoch 6
Supervised Training Loss: 0.6927223205566406
Consistency Loss: 4.5242515511745296e-08





Epoch 7
Supervised Training Loss: 0.6927068829536438
Consistency Loss: 8.765574932567688e-08





Epoch 8
Supervised Training Loss: 0.6927003264427185
Consistency Loss: 7.354530850989249e-08





Epoch 9
Supervised Training Loss: 0.692698061466217
Consistency Loss: 4.557443489261459e-08





Epoch 10
Supervised Training Loss: 0.692697286605835
Consistency Loss: 2.765159301532094e-08


                                              

Testing Accuracy: 0.485
Testing F1 Score: 0.3168013468013468
Testing Precision: 0.23522500000000002
Testing Recall: 0.485


  _warn_prf(average, modifier, msg_start, len(result))
