In [35]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import torch
#from torch_geometric.data import Dataset, Data

In [36]:
class MetamatDataset(torch.utils.data.Dataset):
    def __init__(self, data_list):
        """
        Custom dataset for 3D graph data with 'Young' as the label.

        Args:
            data_list (list): List of dictionaries containing 'Nodal positions', 'Edge index', and 'Young'.
        """
        self.data_list = data_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        """
        Get a single graph data object.

        Args:
            idx (int): Index of the data to retrieve.

        Returns:
            dict: Dictionary containing node positions, edge index, and Young's modulus.
        """
        entry = self.data_list[idx]
        nodal_positions = torch.tensor(entry['Nodal positions'], dtype=torch.float).squeeze()
        edge_index = torch.tensor(entry['Edge index'], dtype=torch.long).squeeze().t().contiguous()
        young = torch.tensor(entry['Young'], dtype=torch.float)

        return {
            'x': nodal_positions,
            'edge_index': edge_index,
            'y': young
        }

In [37]:
import torch.nn as nn
import torch

class SimpleGCNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(SimpleGCNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=False)

    def forward(self, x, edge_index):
        num_nodes = x.size(0)
        # Build adjacency matrix from edge_index
        adj = torch.zeros((num_nodes, num_nodes), device=x.device)
        adj[edge_index[0], edge_index[1]] = 1
        # Add self-loops
        adj += torch.eye(num_nodes, device=x.device)
        # Normalize adjacency
        deg = adj.sum(dim=1)
        deg_inv_sqrt = torch.pow(deg, -0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        D_inv_sqrt = torch.diag(deg_inv_sqrt)
        adj_norm = D_inv_sqrt @ adj @ D_inv_sqrt
        x = self.linear(x)
        out = adj_norm @ x
        return out

class PredictorGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(PredictorGNN, self).__init__()
        self.gcn1 = SimpleGCNLayer(input_dim, hidden_dim)
        self.gcn2 = SimpleGCNLayer(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, adj):
        x = self.gcn1(x, adj)
        x = torch.relu(x)
        x = self.gcn2(x, adj)
        x = torch.relu(x)
        x = torch.mean(x, dim=0)  # Global mean pooling
        x = self.fc(x)
        return x

In [38]:
def read_pkl_file(file_path):
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
    return data

ini_data = read_pkl_file("/home/wzhan24/MetaMatDiff/datacreate/data.pkl")
ini_data = [item for item in ini_data if 'Young' in item and 'Nodal positions' in item and 'Edge index' in item]

In [39]:
from torch.utils.data import DataLoader
dataset = MetamatDataset(ini_data)

# Split dataset into train, validation, and test sets
train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

batch_size = 1  # Use 1 for graph data unless you implement batching

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Train dataset size: 10333
Validation dataset size: 3444
Test dataset size: 3445


In [40]:
train_dataset[0]['edge_index']  # Example to access edge index of the first graph in the train dataset

tensor([[  1,   1,  20,  10,   4,   2,  32,  32,  62,  61,  59,  22,  21,  35,
          40,  72,  61,  16,  60,  60,  90,  41,  71,  63,   4,  20,  10,  24,
          44,   2,  26,  11,  67,  56,  16,  22,  73,  17,  21,  70,  34,  84,
          55,  72,  45,  71,  85,  69,   7,  24,   4,  50,  22,  11,  14,  47,
          46,  73,  21,  70,  80,  75,  76,  71,   1,  10,  22,   4,   2,  23,
          11,  59,  21,  22,  35,  17,  70,  16,  70,  90,  25,  19,  71,  74,
           0,  27,  43,  30,   7,  56,  52,  26,  14,  47,  68,  34,  75,  38,
          84,  53,  54,  45,  83,  77,  18,  82,  86,  69,  27,   6,   7,  28,
          44,  48,   3,  52,  67,  14,  47,  78,  46,   5,  13,  53,  75,  83,
          36,  76,  77,  79,  49,  82,  43,   9,  50,  48,  15,  68,  78,  46,
          47,  80,  75,  76,   1,   2,   0,   3,   4,  10, 102,  11,  16, 106,
           6,   9,   3,  13,  12,   8,   5,  15, 116,   7,   6,  13,  14, 122,
          17,  19,   8,  18,  20,  24,   4,  22,  23

In [44]:
# Initialize the model, optimizer, and loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PredictorGNN(input_dim=3, hidden_dim=64, output_dim=3).to('cuda')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Training loop with validation
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_loader:
            x = batch['x'].to(device)
            edge_index = batch['edge_index'].to(device)
            y = batch['y'].to(device)

            optimizer.zero_grad()
            output = model(x[0], edge_index[0])
            loss = criterion(output.squeeze(), y.squeeze())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.8f}")

        # Validate every 5 epochs
        if (epoch + 1) % 2 == 0:
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch in val_loader:
                    x = batch['x'].to(device)
                    edge_index = batch['edge_index'].to(device)
                    y = batch['y'].to(device)
                    output = model(x[0], edge_index[0])
                    loss = criterion(output.squeeze(), y.squeeze())
                    val_loss += loss.item()
            print(f"Validation Loss after Epoch {epoch+1}: {val_loss/len(val_loader):.8f}")
            model.train()

# Train the model
train_model(model, train_loader, val_loader, optimizer, criterion, device)

Epoch 1/10, Loss: 0.00001884
Epoch 2/10, Loss: 0.00000125
Validation Loss after Epoch 2: 0.00000206
Epoch 3/10, Loss: 0.00000118
Epoch 4/10, Loss: 0.00000119
Validation Loss after Epoch 4: 0.00000085
Epoch 5/10, Loss: 0.00000115
Epoch 6/10, Loss: 0.00000115
Validation Loss after Epoch 6: 0.00000100
Epoch 7/10, Loss: 0.00000115
Epoch 8/10, Loss: 0.00000115
Validation Loss after Epoch 8: 0.00000112
Epoch 9/10, Loss: 0.00000113
Epoch 10/10, Loss: 0.00000112
Validation Loss after Epoch 10: 0.00000148
