In [422]:
import gzip
import json
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_scipy_sparse_matrix
from torch_geometric.transforms import RandomNodeSplit
from sklearn.model_selection import train_test_split
from torch_geometric.nn import GCNConv
from torch.nn import Linear, ModuleList


In [70]:
with gzip.open('xbar/1/xbar.json.gz','rb') as f:
    design = json.loads(f.read().decode('utf-8'))

instances = pd.DataFrame(design['instances'])
nets = pd.DataFrame(design['nets'])

conn=np.load('xbar/1/xbar_connectivity.npz')
A = coo_matrix((conn['data'], (conn['row'], conn['col'])), shape=conn['shape'])
A = A.__mul__(A.T)

In [27]:
def buildBST(array,start=0,finish=-1):
    if finish<0:
        finish = len(array)
    mid = (start + finish) // 2
    if mid-start==1:
        ltl=start
    else:
        ltl=buildBST(array,start,mid)
    
    if finish-mid==1:
        gtl=mid
    else:
        gtl=buildBST(array,mid,finish)
        
    return((array[mid],ltl,gtl))

congestion_data = np.load('xbar/1/xbar_congestion.npz')
xbst=buildBST(congestion_data['xBoundaryList'])
ybst=buildBST(congestion_data['yBoundaryList'])
demand = np.zeros(shape = [instances.shape[0],])

In [28]:
def getGRCIndex(x,y,xbst,ybst):
    while (type(xbst)==tuple):
        if x < xbst[0]:
            xbst=xbst[1]
        else:
            xbst=xbst[2]
            
    while (type(ybst)==tuple):
        if y < ybst[0]:
            ybst=ybst[1]
        else:
            ybst=ybst[2]
            
    return ybst, xbst


for k in range(instances.shape[0]):
    # print(k)
    xloc = instances.iloc[k]['xloc']; yloc = instances.iloc[k]['yloc']
    i,j=getGRCIndex(xloc,yloc,xbst,ybst)
    d = 0 
    for l in list(congestion_data['layerList']): 
        lyr=list(congestion_data['layerList']).index(l)
        d += congestion_data['demand'][lyr][i][j]
    demand[k] = d
        
instances['routing_demand'] = demand

## Creating the test train split

In [390]:
torch.manual_seed(42) # for replication

X = torch.tensor(instances[['xloc', 'yloc', 'cell', 'orient']].values) # 4 features
y = torch.tensor(instances['routing_demand'].values) # y value

# getting edge index for message passing
ei = from_scipy_sparse_matrix(A)
edge_index = ei[0]

data = Data(x=X, edge_index=edge_index, y=y)

# Use RandomNodeSplit to split nodes randomly
split = RandomNodeSplit(num_splits=1, num_val=0.0, num_test=0.3)
split_data = split(data)

# we need to split edge_index based off how node was split
train = torch.argwhere(split_data.train_mask).reshape(-1)
test = torch.argwhere(split_data.test_mask).reshape(-1)
train_mapping = {a.item():b for a,b in zip(train, range(train.shape[0]))}
test_mapping = {a.item():b for a,b in zip(test, range(test.shape[0]))}
X_train = split_data.x[split_data.train_mask].float()
X_test = split_data.x[split_data.test_mask].float()
y_train = split_data.y[split_data.train_mask].float()
y_test = split_data.y[split_data.test_mask].float()
edge_index_train = torch.tensor([train_mapping[i.item()] for i in edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.train_mask).reshape(-1)), dim=0)).reshape(-1)].reshape(-1)]).reshape(2, edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.train_mask).reshape(-1)), dim=0)).reshape(-1)].shape[1])
edge_index_test = torch.tensor([test_mapping[i.item()] for i in edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.test_mask).reshape(-1)), dim=0)).reshape(-1)].reshape(-1)]).reshape(2, edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.test_mask).reshape(-1)), dim=0)).reshape(-1)].shape[1])
final_train = Data(x=X_train, edge_index=edge_index_train, y=y_train)
final_test = Data(x=X_test, edge_index=edge_index_test, y=y_test)

In [303]:
conv1 = GCNConv(4, 768)
conv1(split_data.x[split_data.train_mask].float(), torch.tensor([mapping[i.item()] for i in edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.train_mask).reshape(-1)), dim=0)).reshape(-1)].reshape(-1)]).reshape(2, edge_index[:,torch.argwhere(torch.all(torch.isin(edge_index, torch.argwhere(split_data.train_mask).reshape(-1)), dim=0)).reshape(-1)].shape[1])
).shape

torch.Size([2766, 768])

In [396]:
class GCN(torch.nn.Module):
    def __init__(self, num_hidden_layers):
        super().__init__()

        self.num_hidden_layers = num_hidden_layers
        self.gcn = GCNConv(4, 768)
        self.hidden_layers = ModuleList([GCNConv(768, 768) for _ in range(self.num_hidden_layers)])
        self.out = Linear(768, 1)

    def forward(self, x, edge_index):
        h = self.gcn(x, edge_index).relu()
        for _, l in enumerate(self.hidden_layers):
            h = l(h, edge_index).relu()
        z = self.out(h)
        return h, z

## Problem 1a

Below I set up a train function that takes in a model and a dataset

In [None]:
def train(model, data):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.02)

    embeddings = []
    losses = []
    outputs = []

    # Training loop
    for epoch in range(200):
        optimizer.zero_grad()

        h, z = model(data.x, data.edge_index)

        loss = criterion(z.float(), data.y)

        loss.backward()

        optimizer.step()

        embeddings.append(h)
        losses.append(loss)

        outputs.append(z.argmax(dim=1))

        # Print metrics every 10 epochs
        if epoch % 10 == 0:
            print(f'Epoch {epoch:>3} | Loss: {loss:.2f}')


In [399]:
one_layer_model = GCN(0)
train(one_layer_model, final_train)


Epoch   0 | Loss: 1333273.75
Epoch  10 | Loss: 520175.38
Epoch  20 | Loss: 2042.67
Epoch  30 | Loss: 237.68
Epoch  40 | Loss: 618.19
Epoch  50 | Loss: 631.04
Epoch  60 | Loss: 525.16
Epoch  70 | Loss: 267.45
Epoch  80 | Loss: 203.47
Epoch  90 | Loss: 180.41
Epoch 100 | Loss: 161.00
Epoch 110 | Loss: 141.21
Epoch 120 | Loss: 121.49
Epoch 130 | Loss: 103.20
Epoch 140 | Loss: 83.89
Epoch 150 | Loss: 69.90
Epoch 160 | Loss: 63.53
Epoch 170 | Loss: 61.43
Epoch 180 | Loss: 60.96
Epoch 190 | Loss: 60.92


In [398]:
two_layer_model = GCN(1)
train(two_layer_model, final_train)

Epoch   0 | Loss: 11175.13
Epoch  10 | Loss: 25486240.00
Epoch  20 | Loss: 24455.21
Epoch  30 | Loss: 357154.22
Epoch  40 | Loss: 38370.19
Epoch  50 | Loss: 9734.55
Epoch  60 | Loss: 1827.83
Epoch  70 | Loss: 682.14
Epoch  80 | Loss: 623.60
Epoch  90 | Loss: 588.38
Epoch 100 | Loss: 491.25
Epoch 110 | Loss: 391.73
Epoch 120 | Loss: 322.60
Epoch 130 | Loss: 266.27
Epoch 140 | Loss: 213.38
Epoch 150 | Loss: 170.73
Epoch 160 | Loss: 137.79
Epoch 170 | Loss: 113.86
Epoch 180 | Loss: 97.98
Epoch 190 | Loss: 88.28


In [401]:
three_layer_model = GCN(2)
train(three_layer_model, final_train)

Epoch   0 | Loss: 7228.60
Epoch  10 | Loss: 164106912.00
Epoch  20 | Loss: 120014176.00
Epoch  30 | Loss: 38614256.00
Epoch  40 | Loss: 11552405.00
Epoch  50 | Loss: 4134028.75
Epoch  60 | Loss: 1049274.75
Epoch  70 | Loss: 75680.92
Epoch  80 | Loss: 51247.93
Epoch  90 | Loss: 93241.73
Epoch 100 | Loss: 14497.59
Epoch 110 | Loss: 13358.26
Epoch 120 | Loss: 7522.93
Epoch 130 | Loss: 6973.73
Epoch 140 | Loss: 5439.39
Epoch 150 | Loss: 5271.04
Epoch 160 | Loss: 4910.72
Epoch 170 | Loss: 4585.75
Epoch 180 | Loss: 4322.17
Epoch 190 | Loss: 4082.26


In [402]:
four_layer_model = GCN(3)
train(four_layer_model, final_train)

Epoch   0 | Loss: 356.64
Epoch  10 | Loss: 17905674240.00
Epoch  20 | Loss: 1307512.62
Epoch  30 | Loss: 984320.38
Epoch  40 | Loss: 234282.08
Epoch  50 | Loss: 15113.77
Epoch  60 | Loss: 13531.70
Epoch  70 | Loss: 6246.34
Epoch  80 | Loss: 5370.67
Epoch  90 | Loss: 3669.27
Epoch 100 | Loss: 3074.87
Epoch 110 | Loss: 198610.30
Epoch 120 | Loss: 4275910.00
Epoch 130 | Loss: 28751.39
Epoch 140 | Loss: 18302.13
Epoch 150 | Loss: 38596.46
Epoch 160 | Loss: 23993.80
Epoch 170 | Loss: 7438.18
Epoch 180 | Loss: 533.50
Epoch 190 | Loss: 864.96


In [403]:
five_layer_model = GCN(4)
train(five_layer_model, final_train)

Epoch   0 | Loss: 52133.25
Epoch  10 | Loss: 193060.41
Epoch  20 | Loss: 351514.22
Epoch  30 | Loss: 245.31
Epoch  40 | Loss: 151.35
Epoch  50 | Loss: 105.38
Epoch  60 | Loss: 74.18
Epoch  70 | Loss: 66.23
Epoch  80 | Loss: 62.79
Epoch  90 | Loss: 60.84
Epoch 100 | Loss: 59.33
Epoch 110 | Loss: 57.90
Epoch 120 | Loss: 56.43
Epoch 130 | Loss: 54.85
Epoch 140 | Loss: 53.11
Epoch 150 | Loss: 51.18
Epoch 160 | Loss: 49.04
Epoch 170 | Loss: 46.06
Epoch 180 | Loss: 42.44
Epoch 190 | Loss: 38.50


## Problem 1b Adding Attention

In [406]:
class AttentionGCN(torch.nn.Module):
    def __init__(self, num_hidden_layers):
        super(AttentionGCN, self).__init__()
        
        self.num_hidden_layers = num_hidden_layers

        # added attention
        self.attention = Linear(2 * 768, 1)

        self.num_hidden_layers = num_hidden_layers
        self.gcn = GCNConv(4, 768)
        self.hidden_layers = ModuleList([GCNConv(768, 768) for _ in range(self.num_hidden_layers)])
        self.out = Linear(768, 1)

    def forward(self, x, edge_index):
        h = self.gcn(x, edge_index).relu()

        # Self-attention mechanism
        attention_weights = torch.cat((h, h), dim=1)
        attention_weights = torch.tanh(self.attention(attention_weights))
        attention_weights = torch.nn.functional.softmax(attention_weights, dim=1)
        h = h * attention_weights

        for _, l in enumerate(self.hidden_layers):
            h = l(h, edge_index).relu()
        z = self.out(h)
        return h, z

In [407]:
attention_model = AttentionGCN(4)
train(attention_model, final_train)

Epoch   0 | Loss: 28520.75
Epoch  10 | Loss: 2151835.00
Epoch  20 | Loss: 1873.23
Epoch  30 | Loss: 644.09
Epoch  40 | Loss: 129.38
Epoch  50 | Loss: 68.72
Epoch  60 | Loss: 67.01
Epoch  70 | Loss: 63.55
Epoch  80 | Loss: 60.17
Epoch  90 | Loss: 56.18
Epoch 100 | Loss: 49.72
Epoch 110 | Loss: 47.32
Epoch 120 | Loss: 40.66
Epoch 130 | Loss: 36.06
Epoch 140 | Loss: 30.99
Epoch 150 | Loss: 29.23
Epoch 160 | Loss: 27.52
Epoch 170 | Loss: 27.16
Epoch 180 | Loss: 25.84
Epoch 190 | Loss: 24.93


## Using other xbar graphs for testing generalization

It seems like all the other xbar data was the same?
So I am just going to test on the test set to see how it generalizes

In [420]:
# Using attention model because it has the best

def test(model, data):
    return model(data.x, data.edge_index)

mse = torch.nn.MSELoss()
mse(attention_model(final_test.x, final_test.edge_index)[1].reshape(-1), final_test.y)


tensor(24.4887, grad_fn=<MseLossBackward0>)

This generalized pretty well considering the MSE loss is almost the same for train and test.  I think I could of trained the model a little more as the loss was still decreasing.