<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/GNN_TRIED_FOR_ARABIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch

!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import pandas as pd
import numpy as np
from tqdm import tqdm
from google.colab import files

# Load dataset
uploaded = files.upload()
df = pd.read_excel(pd.ExcelFile(list(uploaded.keys())[0]), header=0)

# Graph Preparation
tweets_column = 'tweet'
labels_column = 'label'
NUM_LABELS = len(df[labels_column].unique())
possible_labels = df[labels_column].unique()
label_dict = {possible_label: index for index, possible_label in enumerate(possible_labels)}
df['labels'] = df[labels_column].map(label_dict)

# Split the dataset
df_labeled, df_temp = train_test_split(df, stratify=df[labels_column], test_size=0.6)
df_unlabeled, df_test = train_test_split(df_temp, stratify=df_temp[labels_column], test_size=0.2)

# Convert to graph data
# Note: This is a placeholder for graph creation. You'll need a proper method to create edges based on your data (e.g., similarity).
def create_graph_data(df):
    num_nodes = len(df)
    x = torch.eye(num_nodes, dtype=torch.float)  # One-hot encoding as node features (placeholder)
    labels = torch.tensor(df['labels'].values, dtype=torch.long)
    # Sample graph: Replace this with your logic for creating edges between nodes
    edge_index = torch.tensor([[i, j] for i in range(num_nodes) for j in range(num_nodes) if i != j], dtype=torch.long).t().contiguous()
    data = Data(x=x, edge_index=edge_index, y=labels)
    return data

# Create graph data
graph_data_labeled = create_graph_data(df_labeled)
graph_data_unlabeled = create_graph_data(df_unlabeled)
graph_data_test = create_graph_data(df_test)

# DataLoader
batch_size = 4
dataloader_train = DataLoader([graph_data_labeled], batch_size=batch_size, shuffle=True)
dataloader_unlabeled = DataLoader([graph_data_unlabeled], batch_size=batch_size, shuffle=True)
dataloader_test = DataLoader([graph_data_test], batch_size=batch_size, shuffle=False)

# Define GNN Model
class GNNModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Initialize student and teacher models
student_model = GNNModel(num_node_features=graph_data_labeled.num_node_features, hidden_channels=64, num_classes=NUM_LABELS)
teacher_model = GNNModel(num_node_features=graph_data_labeled.num_node_features, hidden_channels=64, num_classes=NUM_LABELS)

# Set up the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
student_model.to(device)
teacher_model.to(device)

# Copy student model parameters to the teacher model
teacher_model.load_state_dict(student_model.state_dict())

# Set up optimizer
optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
epochs = 5
alpha = 0.999  # EMA decay rate

# Define evaluation metrics
def compute_metrics(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    accuracy = accuracy_score(labels_flat, preds_flat)
    f1 = f1_score(labels_flat, preds_flat, average='weighted')
    precision = precision_score(labels_flat, preds_flat, average='weighted')
    recall = recall_score(labels_flat, preds_flat, average='weighted')
    return accuracy, f1, precision, recall

# Training loop
for epoch in range(1, epochs + 1):
    student_model.train()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc=f'Epoch {epoch}', leave=False, disable=False)

    for batch in progress_bar:
        student_model.zero_grad()
        batch = batch.to(device)

        # Forward pass through the student model
        logits_student = student_model(batch.x, batch.edge_index)
        loss = F.cross_entropy(logits_student, batch.y)

        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item())})

    loss_train_avg = loss_train_total / len(dataloader_train)
    tqdm.write(f'\nEpoch {epoch}')
    tqdm.write(f'Training loss: {loss_train_avg}')

# Evaluation on test data
teacher_model.eval()
loss_test_total = 0
predictions, true_vals = [], []

for batch in tqdm(dataloader_test, desc='Testing', leave=False):
    batch = batch.to(device)
    with torch.no_grad():
        outputs = teacher_model(batch.x, batch.edge_index)
    loss = F.cross_entropy(outputs, batch.y)
    loss_test_total += loss.item()

    preds = outputs.detach().cpu().numpy()
    labels = batch.y.cpu().numpy()

    predictions.append(preds)
    true_vals.append(labels)

predictions = np.concatenate(predictions, axis=0)
true_vals = np.concatenate(true_vals, axis=0)

# Calculate metrics
test_accuracy, test_f1, test_precision, test_recall = compute_metrics(predictions, true_vals)
print(f'Testing Accuracy: {test_accuracy}')
print(f'Testing F1 Score: {test_f1}')
print(f'Testing Precision: {test_precision}')
print(f'Testing Recall: {test_recall}')


[0mLooking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.4.0%2Bcu121/torch_scatter-2.1.2%2Bpt24cu121-cp310-cp310-linux_x86_64.whl (10.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt24cu121
Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.4.0%2Bcu121/torch_sparse-0.6.18%2Bpt24cu121-cp310-cp310-linux_x86_64.whl (5.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt24cu121
Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Collecting torch-cluster
  Downloading https://dat

Saving Arabic_Depression_10.000_Tweets.xlsx to Arabic_Depression_10.000_Tweets.xlsx





Epoch 1
Training loss: 0.6931471824645996





Epoch 2
Training loss: 0.6931555271148682





Epoch 3
Training loss: 0.6931483149528503





Epoch 4
Training loss: 0.6931475400924683





Epoch 5
Training loss: 0.6931476593017578


                                              

Testing Accuracy: 0.5
Testing F1 Score: 0.3333333333333333
Testing Precision: 0.25
Testing Recall: 0.5


  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
# Split the dataset
df_labeled, df_temp = train_test_split(df, stratify=df[labels_column], test_size=0.6)
df_unlabeled, df_test = train_test_split(df_temp, stratify=df_temp[labels_column], test_size=0.2)

# Convert to graph data
# Note: This is a placeholder for graph creation. You'll need a proper method to create edges based on your data (e.g., similarity).
def create_graph_data(df):
    num_nodes = len(df)
    x = torch.eye(num_nodes, dtype=torch.float)  # One-hot encoding as node features (placeholder)
    labels = torch.tensor(df['labels'].values, dtype=torch.long)
    # Sample graph: Replace this with your logic for creating edges between nodes
    edge_index = torch.tensor([[i, j] for i in range(num_nodes) for j in range(num_nodes) if i != j], dtype=torch.long).t().contiguous()
    data = Data(x=x, edge_index=edge_index, y=labels)
    return data

# Create graph data
graph_data_labeled = create_graph_data(df_labeled)
graph_data_unlabeled = create_graph_data(df_unlabeled)
graph_data_test = create_graph_data(df_test)

# DataLoader
batch_size = 4
dataloader_train = DataLoader([graph_data_labeled], batch_size=batch_size, shuffle=True)
dataloader_unlabeled = DataLoader([graph_data_unlabeled], batch_size=batch_size, shuffle=True)
dataloader_test = DataLoader([graph_data_test], batch_size=batch_size, shuffle=False)

# Define GNN Model
class GNNModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Initialize student and teacher models
student_model = GNNModel(num_node_features=graph_data_labeled.num_node_features, hidden_channels=64, num_classes=NUM_LABELS)
teacher_model = GNNModel(num_node_features=graph_data_labeled.num_node_features, hidden_channels=64, num_classes=NUM_LABELS)

# Set up the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
student_model.to(device)
teacher_model.to(device)

# Copy student model parameters to the teacher model
teacher_model.load_state_dict(student_model.state_dict())

# Set up optimizer
optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
epochs = 5
alpha = 0.999  # EMA decay rate

# Define evaluation metrics
def compute_metrics(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    accuracy = accuracy_score(labels_flat, preds_flat)
    f1 = f1_score(labels_flat, preds_flat, average='weighted')
    precision = precision_score(labels_flat, preds_flat, average='weighted')
    recall = recall_score(labels_flat, preds_flat, average='weighted')
    return accuracy, f1, precision, recall

# Training loop
for epoch in range(1, epochs + 1):
    student_model.train()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc=f'Epoch {epoch}', leave=False, disable=False)

    for batch in progress_bar:
        student_model.zero_grad()
        batch = batch.to(device)

        # Forward pass through the student model
        logits_student = student_model(batch.x, batch.edge_index)
        loss = F.cross_entropy(logits_student, batch.y)

        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(student_model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item())})

    loss_train_avg = loss_train_total / len(dataloader_train)
    tqdm.write(f'\nEpoch {epoch}')
    tqdm.write(f'Training loss: {loss_train_avg}')

# Evaluation on test data
teacher_model.eval()
loss_test_total = 0
predictions, true_vals = [], []

for batch in tqdm(dataloader_test, desc='Testing', leave=False):
    batch = batch.to(device)
    with torch.no_grad():
        outputs = teacher_model(batch.x, batch.edge_index)
    loss = F.cross_entropy(outputs, batch.y)
    loss_test_total += loss.item()

    preds = outputs.detach().cpu().numpy()
    labels = batch.y.cpu().numpy()

    predictions.append(preds)
    true_vals.append(labels)

predictions = np.concatenate(predictions, axis=0)
true_vals = np.concatenate(true_vals, axis=0)

# Calculate metrics
test_accuracy, test_f1, test_precision, test_recall = compute_metrics(predictions, true_vals)
print(f'Testing Accuracy: {test_accuracy}')
print(f'Testing F1 Score: {test_f1}')
print(f'Testing Precision: {test_precision}')
print(f'Testing Recall: {test_recall}')





Epoch 1
Training loss: 0.6931473612785339





Epoch 2
Training loss: 0.6931537985801697





Epoch 3
Training loss: 0.6931480765342712





Epoch 4
Training loss: 0.6931471228599548





Epoch 5
Training loss: 0.6931480765342712




RuntimeError: mat1 and mat2 shapes cannot be multiplied (1200x1200 and 4000x64)