<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/Supervised_GraphSAGE_BANGLA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

!pip uninstall torch-scatter torch-sparse torch-geometric torch-cluster  --y
!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-{torch.__version__}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git

Found existing installation: torch_scatter 2.1.2+pt24cu121
Uninstalling torch_scatter-2.1.2+pt24cu121:
  Successfully uninstalled torch_scatter-2.1.2+pt24cu121
Found existing installation: torch_sparse 0.6.18+pt24cu121
Uninstalling torch_sparse-0.6.18+pt24cu121:
  Successfully uninstalled torch_sparse-0.6.18+pt24cu121
Found existing installation: torch-geometric 2.6.0
Uninstalling torch-geometric-2.6.0:
  Successfully uninstalled torch-geometric-2.6.0
Found existing installation: torch_cluster 1.6.3+pt24cu121
Uninstalling torch_cluster-1.6.3+pt24cu121:
  Successfully uninstalled torch_cluster-1.6.3+pt24cu121
Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.html
Collecting torch-scatter
  Using cached https://data.pyg.org/whl/torch-2.4.0%2Bcu121/torch_scatter-2.1.2%2Bpt24cu121-cp310-cp310-linux_x86_64.whl (10.9 MB)
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt24cu121
Looking in links: https://data.pyg.org/whl/torch-2.4.0+cu121.htm

In [6]:
# Step 1: Install Required Libraries
!pip install networkx

# Step 2: Import Libraries
import torch
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data, DataLoader
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from torch_geometric.utils import from_networkx
import pandas as pd
import networkx as nx
from google.colab import files

# Step 3: Upload Dataset
uploaded = files.upload()  # Upload your CSV file (e.g., dataset.csv)





Saving Bangla.xlsx to Bangla.xlsx


In [7]:
# Load the dataset
df = pd.read_excel(pd.ExcelFile(list(uploaded.keys())[0]), header=0)# Expecting two columns: 'text' and 'label' where label is 0 or 1
df = df.sample(frac=0.3, random_state=42)

df = df[['tweets', 'labels']]

# Step 4: Preprocess Text Data and Create Embeddings
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')
model = AutoModel.from_pretrained('xlm-roberta-base')

def get_embeddings(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)
    # Use the mean of last hidden states as the sentence embeddings
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings





In [8]:
# Generate embeddings for all texts
embeddings = get_embeddings(df['tweets'].tolist())
labels = torch.tensor(df['labels'].values, dtype=torch.long)

In [9]:
from sklearn.metrics.pairwise import cosine_similarity
G = nx.Graph()
# Add nodes with features
for i in range(len(df)):
    G.add_node(i, x=embeddings[i])

# Step 6: Create Edges Using Cosine Similarity
# Compute pairwise cosine similarity
similarity_matrix = cosine_similarity(embeddings)

# Define a threshold for creating edges
threshold = 0.8  # You can adjust this value

# Add edges based on similarity
for i in range(len(similarity_matrix)):
    for j in range(i + 1, len(similarity_matrix)):
        if similarity_matrix[i, j] > threshold:
            G.add_edge(i, j)

# Convert NetworkX graph to PyTorch Geometric Data
data = from_networkx(G)
data.x = torch.stack([data.x[i] for i in range(len(G.nodes))])
data.y = labels


KeyError: 'label'

In [10]:

# Step 7: Split Data into Train and Test (80-20 split)
train_mask, test_mask = train_test_split(range(len(df)), test_size=0.2, stratify=df['labels'])

data.train_mask = torch.tensor(train_mask, dtype=torch.long)
data.test_mask = torch.tensor(test_mask, dtype=torch.long)

# Step 8: Define GraphSAGE Model
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Initialize the model
model = GraphSAGE(in_channels=embeddings.size(1), hidden_channels=128, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Step 9: Train the Model
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# Step 10: Test the Model
def test():
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    test_pred = pred[data.test_mask]
    test_true = data.y[data.test_mask]
    acc = accuracy_score(test_true.cpu(), test_pred.cpu())
    f1 = f1_score(test_true.cpu(), test_pred.cpu())
    precision = precision_score(test_true.cpu(), test_pred.cpu())
    recall = recall_score(test_true.cpu(), test_pred.cpu())
    return acc, f1, precision, recall

# Train for a number of epochs
for epoch in range(1, 101):
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

# Evaluate the model
acc, f1, precision, recall = test()
print(f'Accuracy: {acc:.4f}, F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}')

Epoch 10, Loss: 0.6923
Epoch 20, Loss: 0.6925
Epoch 30, Loss: 0.6925
Epoch 40, Loss: 0.6924
Epoch 50, Loss: 0.6923
Epoch 60, Loss: 0.6923
Epoch 70, Loss: 0.6922
Epoch 80, Loss: 0.6923
Epoch 90, Loss: 0.6922
Epoch 100, Loss: 0.6922
Accuracy: 0.5169, F1 Score: 0.6816, Precision: 0.5169, Recall: 1.0000
