In [6]:
import torch
from torch_geometric.data import Data
import torch_geometric.nn as pyg_nn
import torch.nn.functional as F

In [7]:
import data_loader
from torch_geometric.data import InMemoryDataset, Data

train_data1, test_data1 = data_loader.load_data_object("../key_presses1.csv", test_split=0.8)
train_data2, test_data2  = data_loader.load_data_object("../key_presses2.csv", test_split=0.8)
num_features = test_data1.x.shape[1]

# # Move data to the specified device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class SimpleGraphDataset(InMemoryDataset):
    def __init__(self, data_list, num_features, num_classes):
        super(SimpleGraphDataset, self).__init__('.', None, None, None)
        self.data, self.slices = self.collate(data_list)  # Collate all data objects
        # self.num_classes_ = num_classes
        # self.num_features_ = num_features

    def __len__(self):
        return len(self.data.y)  # Number of graphs in the dataset


# split into test and train
data_pos = Data(x=train_data1.x, edge_index=train_data1.edge_index, y=torch.tensor([0])).to(device)
data_neg = Data(x=train_data2.x, edge_index=train_data2.edge_index, y=torch.tensor([1])).to(device)

# Create the dataset
dataset = SimpleGraphDataset([data_pos, data_neg], num_features=num_features, num_classes=2)

# split into test and train
test_data_pos = Data(x=test_data1.x, edge_index=test_data1.edge_index, y=torch.tensor([0])).to(device)
test_data_neg = Data(x=test_data2.x, edge_index=test_data2.edge_index, y=torch.tensor([1])).to(device)

# Create the dataset
test_dataset = SimpleGraphDataset([data_pos, data_neg], num_features=num_features, num_classes=2)



print(data_pos.x.device)
print(data_pos.y.device)
print(data_pos.edge_index.device)
print(data_neg.x.device)
print(data_neg.y.device)
print(data_neg.edge_index.device)



# Check the dataset
print("Number of samples in the dataset:", len(dataset))
print("Sample 1 (Positive Label):", dataset[0])
print("Sample 2 (Negative Label):", dataset[1])

cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
Number of samples in the dataset: 2
Sample 1 (Positive Label): Data(x=[24, 6], edge_index=[2, 92], y=[1])
Sample 2 (Negative Label): Data(x=[23, 6], edge_index=[2, 61], y=[1])




In [8]:

# Define a simple GCN model
class LetterGNN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_dim, num_classes, num_layers=2):
        super(LetterGNN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(pyg_nn.GCNConv(num_node_features, hidden_dim))
        for _ in range(num_layers - 1):
            self.convs.append(pyg_nn.GCNConv(hidden_dim, hidden_dim))

        self.fc = torch.nn.Linear(hidden_dim, num_classes)

    def forward(self, x, edge_index, batch):
        for conv in self.convs:
            x = x.float()
            edge_index = edge_index.long()
            x = conv.forward(x, edge_index)
            x = F.relu(x)
        
        # idk maybe use a different pool method 
        x = pyg_nn.global_mean_pool(x, batch)

        # classify
        x = self.fc(x)

        return x


In [9]:
# Train the model 

from torch_geometric.loader import DataLoader
# Q: How Do we choose hidden dims size ?? !!!!!!!!!!

# Define the model, loss, and optimizer
model = LetterGNN(num_node_features=dataset.num_node_features, hidden_dim=64, num_classes=dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Train loop
def train(model, data_loader):
    model.train()
    total_loss = 0
    for data in data_loader:
        optimizer.zero_grad()
        output = model(data.x, data.edge_index, data.batch)  # Forward pass
        loss = criterion(output, data.y)  # Compute the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update the model parameters
        total_loss += loss.item()
    return total_loss / len(data_loader)


# Training over epochs
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
for epoch in range(1, 201):
    loss = train(model, data_loader)
    print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}")



Epoch: 001, Loss: 0.5201
Epoch: 002, Loss: 34.9376
Epoch: 003, Loss: 27.9130
Epoch: 004, Loss: 13.2839
Epoch: 005, Loss: 1.5461
Epoch: 006, Loss: 1.2546
Epoch: 007, Loss: 2.3999
Epoch: 008, Loss: 2.7903
Epoch: 009, Loss: 0.4897
Epoch: 010, Loss: 3.2877
Epoch: 011, Loss: 0.6448
Epoch: 012, Loss: 1.2041
Epoch: 013, Loss: 2.3545
Epoch: 014, Loss: 1.6498
Epoch: 015, Loss: 0.1358
Epoch: 016, Loss: 0.0858
Epoch: 017, Loss: 1.7976
Epoch: 018, Loss: 0.1307
Epoch: 019, Loss: 0.0063
Epoch: 020, Loss: 0.0723
Epoch: 021, Loss: 0.2827
Epoch: 022, Loss: 0.2183
Epoch: 023, Loss: 0.0359
Epoch: 024, Loss: 0.0026
Epoch: 025, Loss: 0.0002
Epoch: 026, Loss: 0.0012
Epoch: 027, Loss: 0.1037
Epoch: 028, Loss: 0.0075
Epoch: 029, Loss: 0.0008
Epoch: 030, Loss: 0.0001
Epoch: 031, Loss: 0.0001
Epoch: 032, Loss: 0.0000
Epoch: 033, Loss: 0.0000
Epoch: 034, Loss: 0.0000
Epoch: 035, Loss: 0.0000
Epoch: 036, Loss: 0.0000
Epoch: 037, Loss: 0.0000
Epoch: 038, Loss: 0.0000
Epoch: 039, Loss: 0.0000
Epoch: 040, Loss: 0.00

In [10]:
# test 
def test(model, data_loader):
    model.eval()
    correct = 0
    for data in data_loader:
        print(data.y)
        output = model(data.x, data.edge_index, data.batch)
        print(output)
        pred = output.argmax(dim=1)  # Get the index of the max log-probability
        correct += (pred == data.y).sum().item()
    return correct / len(data_loader.dataset)

# Test the model
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
accuracy = test(model, test_loader)
print(f"Test Accuracy: {accuracy:.4f}")

tensor([0], device='cuda:0')
tensor([[ -3.3653, -20.8411]], device='cuda:0', grad_fn=<AddmmBackward0>)
tensor([1], device='cuda:0')
tensor([[-16.7095,   1.3554]], device='cuda:0', grad_fn=<AddmmBackward0>)
Test Accuracy: 1.0000
