In [1]:
from models.gnn import SeismicGNN

In [19]:
import torch.nn.functional as F
import pickle
from torch_geometric.utils import from_networkx
import torch
import numpy as np
from sklearn.model_selection import train_test_split

In [3]:
def train(model, data, optimizer):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Clear gradients

    # Forward pass
    out = model(data)

    # Compute loss (only on the nodes in the training set)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    # Backpropagation
    loss.backward()
    optimizer.step()

    return loss.item()

In [4]:
def test(model, data):
    model.eval()  # Set the model to evaluation mode
    logits = model(data)
    pred = logits.argmax(dim=1)  # Get the predicted class for each node

    # Compute accuracy on the train, validation, and test sets
    train_correct = pred[data.train_mask] == data.y[data.train_mask]
    val_correct = pred[data.val_mask] == data.y[data.val_mask]
    test_correct = pred[data.test_mask] == data.y[data.test_mask]

    train_acc = int(train_correct.sum()) / int(data.train_mask.sum())
    val_acc = int(val_correct.sum()) / int(data.val_mask.sum())
    test_acc = int(test_correct.sum()) / int(data.test_mask.sum())

    return train_acc, val_acc, test_acc

In [7]:
with open('../data/processed/balance_16k/model_input/graph_with_features_and_labels.gpickle', 'rb') as f:
    graph = pickle.load(f)

In [8]:
type(graph)

networkx.classes.graph.Graph

In [10]:
data = from_networkx(graph)

  data_dict[key] = torch.as_tensor(value)


In [16]:
for i in range(graph.number_of_nodes()):
    graph.nodes[i]['label'] = graph.nodes[i]['label'][0]

In [18]:
label_mapping = {
    "Pn": 0,
    "S": 1,
    "Lg": 2,
    "Sn": 3,
    "P": 4,
    "Pg": 5    
}

# Convert string labels to integers using the mapping
numeric_labels = [label_mapping[graph.nodes[i]['label']] for i in range(graph.number_of_nodes())]

# Convert to tensor
data.x = torch.tensor([graph.nodes[i]['features'] for i in range(graph.number_of_nodes())], dtype=torch.float)
data.y = torch.tensor(numeric_labels, dtype=torch.long)

# Verify the shapes of the features and labels
print(f"Node features shape: {data.x.shape}")  # Should be (num_nodes, num_features)
print(f"Node labels shape: {data.y.shape}")    # Should be (num_nodes,)

Node features shape: torch.Size([16303, 7203])
Node labels shape: torch.Size([16303])


In [20]:
# Number of nodes in the graph
num_nodes = data.num_nodes

# Create an array of node indices (from 0 to num_nodes-1)
node_indices = np.arange(num_nodes)

# Step 1: Split into train (e.g., 70%) and temp (remaining 30%)
train_indices, temp_indices = train_test_split(node_indices, test_size=0.2, random_state=42)

# Step 2: Further split temp into validation (15%) and test (15%)
val_indices, test_indices = train_test_split(temp_indices, test_size=0.5, random_state=42)

# Check the number of nodes in each set
print(f"Train nodes: {len(train_indices)}, Validation nodes: {len(val_indices)}, Test nodes: {len(test_indices)}")

Train nodes: 13042, Validation nodes: 1630, Test nodes: 1631


In [21]:
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[train_indices] = True

val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
val_mask[val_indices] = True

test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
test_mask[test_indices] = True

# Add the masks to the data object
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

In [25]:
# Initialize the GNN model
model = SeismicGNN(in_channels=data.x.shape[1], hidden_channels=64, out_channels=6)  # Adjust based on your data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
data = data.to(device)
model = model.to(device)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
for epoch in range(200):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    out = model(data)

    # Compute the loss using only the nodes in the training set
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Validation and testing
    model.eval()
    with torch.no_grad():
        train_acc, val_acc, test_acc = test(model, data)
        print(f'Epoch {epoch}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')


Using device: cuda


OutOfMemoryError: CUDA out of memory. Tried to allocate 22.18 GiB. GPU 0 has a total capacity of 12.00 GiB of which 0 bytes is free. Of the allocated memory 27.32 GiB is allocated by PyTorch, and 1.05 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Print the device being used (should show 'cuda' if a GPU is available)
print(f"Using device: {device}")

Using device: cuda
