In [30]:
!pip install transformers torch torch-geometric networkx scikit-learn pandas numpy



In [31]:
from transformers import BertTokenizer, BertModel
import torch

# Load pretrained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

# Function to encode text using BERT
def encode_text(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    outputs = bert_model(**inputs)
    return outputs.last_hidden_state.mean(dim=1)  # Return sentence embedding



In [32]:
import torch_geometric
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F

class PropagationEncoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_layers=2):
        super(PropagationEncoder, self).__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers - 1):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        return x.mean(dim=0)  # Graph-level representation

In [33]:
import networkx as nx
from torch_geometric.utils import from_networkx

# Create a sample graph
G = nx.Graph()
G.add_edges_from([(0, 1), (1, 2), (1, 3), (3, 4)])  # Simulate a propagation network

# Add node features (e.g., follower count, retweet count)
for i in G.nodes:
    G.nodes[i]['feature'] = [1.0 * i]  # Example feature: the node index

# Convert to torch_geometric Data object
data = from_networkx(G)
x = torch.tensor([G.nodes[i]['feature'] for i in G.nodes], dtype=torch.float)

In [34]:
class MEFaND(torch.nn.Module):
    def __init__(self, text_hidden_size, graph_hidden_size):
        super(MEFaND, self).__init__()
        self.text_encoder = BertModel.from_pretrained('bert-base-uncased')
        self.graph_encoder = PropagationEncoder(in_channels=1, hidden_channels=graph_hidden_size)
        self.fc = torch.nn.Linear(text_hidden_size + graph_hidden_size, 1)

    def forward(self, text, graph_x, edge_index):
        # Textual representation
        text_inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
        text_outputs = self.text_encoder(**text_inputs)
        text_embedding = text_outputs.last_hidden_state.mean(dim=1)

        # Graph propagation representation
        graph_embedding = self.graph_encoder(graph_x, edge_index)

        # Concatenate both embeddings
        combined = torch.cat([text_embedding, graph_embedding.unsqueeze(0)], dim=1)

        # Return logits (no squeeze here)
        output = self.fc(combined)
        return output  # No need to squeeze, return output as is (shape [1, 1])


In [39]:
import torch.optim as optim

# Create the MEFaND model
model = MEFaND(text_hidden_size=768, graph_hidden_size=128)

# Loss and optimizer
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

# Sample data (for demonstration purposes)
sample_text = "India vs Bangladesh Highlights, 3rd T20I: India Rout Bangladesh By 133 Runs, Complete 2-1 Clean Sweep"
sample_graph_x = x  # Node features from earlier
sample_edge_index = data.edge_index  # Edge index from earlier
sample_label = torch.tensor([1], dtype=torch.float).unsqueeze(0)  # Shape [1, 1]

# Training loop
for epoch in range(100):
    optimizer.zero_grad()
    output = model(sample_text, sample_graph_x, sample_edge_index)  # Model output shape will be [1, 1]
    loss = criterion(output, sample_label)  # Both output and sample_label should be [1, 1]
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

Epoch 0, Loss: 0.5282337069511414
Epoch 10, Loss: 0.03861550614237785
Epoch 20, Loss: 0.008564172312617302
Epoch 30, Loss: 0.0038216114044189453
Epoch 40, Loss: 0.0023992876522243023
Epoch 50, Loss: 0.0017736910376697779
Epoch 60, Loss: 0.0014257751172408462
Epoch 70, Loss: 0.0012036559637635946
Epoch 80, Loss: 0.0010418938472867012
Epoch 90, Loss: 0.0009170982521027327


In [42]:
# Test on a new sample
test_text = "India vs Bangladesh Highlights, 3rd T20I: India Rout Bangladesh By 133 Runs, Complete 3-0 Clean Sweep"
test_graph_x = x
test_edge_index = data.edge_index
with torch.no_grad():
    test_output = model(test_text, test_graph_x, test_edge_index)
    print(f"Prediction: {test_output.item()}")

Prediction: 7.1115593910217285


In [44]:
import torch

# Assuming model output is the logit
logit_output = model(test_text, sample_graph_x, sample_edge_index)

# Apply sigmoid to convert logit to probability
probability = torch.sigmoid(logit_output)

# Convert probability to a binary prediction (1: fake, 0: real)
prediction = (probability > 0.5).float()  # Threshold at 0.5

print(f"Probability: {probability.item()}, Prediction: {prediction.item()}")

Probability: 0.9991850256919861, Prediction: 1.0
