In [1]:
from torch_geometric.datasets import IMDB
from torch_geometric.nn import GATv2Conv
from torch_sparse import SparseTensor
import torch
from torch.nn import Embedding,Sequential, Linear, ReLU, BatchNorm1d,Dropout,LeakyReLU,CrossEntropyLoss
from torch import nn
from torch.optim import Adam, SGD
import copy
from sklearn.metrics import f1_score

In [2]:
data = IMDB('./')[0]

Target is Movie level predictions, as one of three classes (Action, Comedy, and Drama).

In [3]:
torch.unique(data['movie'].y)

tensor([0, 1, 2])

In [3]:
idx = torch.rand(4278)<0.8
train_idx = torch.where(idx)[0]
val_idx = torch.where(~idx)[0]

y = data['movie'].y

In [5]:
data

HeteroData(
  [1mmovie[0m={
    x=[4278, 3066],
    y=[4278],
    train_mask=[4278],
    val_mask=[4278],
    test_mask=[4278]
  },
  [1mdirector[0m={ x=[2081, 3066] },
  [1mactor[0m={ x=[5257, 3066] },
  [1m(movie, to, director)[0m={ edge_index=[2, 4278] },
  [1m(movie, to, actor)[0m={ edge_index=[2, 12828] },
  [1m(director, to, movie)[0m={ edge_index=[2, 4278] },
  [1m(actor, to, movie)[0m={ edge_index=[2, 12828] }
)

### model-1, concat type features
Concat node and edge together. Only possible when dimensions are the same across types

In [4]:
homo_data = data.to_homogeneous()

In [5]:
homo_data

Data(edge_index=[2, 34212], x=[11616, 3066], node_type=[11616], edge_type=[34212])

In [8]:
torch.all(homo_data.x[:4278]==data['movie'].x)

tensor(True)

In [6]:
layers = 8
in_d = 3066
d = 128
d_type = 256
out_d = 3
node_embed_types = 3
edge_embed_types = 4
heads = 4
dropout = 0.1

epochs = 10

In [15]:
class GAT_block(torch.nn.Module):
    def __init__(self,d,d_type,heads,dropout,multiple_factor=2):
        super(GAT_block, self).__init__()   
        self.v_update =  Sequential(Dropout(dropout),
                                    BatchNorm1d(d),
                                    Linear(d,d*multiple_factor),
                                    LeakyReLU(inplace=True),
                                    Dropout(dropout),
                                    BatchNorm1d(d*multiple_factor),
                                    Linear(d*multiple_factor,d),
                                    LeakyReLU(inplace=True))      
        self.conv = GATv2Conv(d,d//heads,heads,edge_dim=d_type,dropout=dropout)
    
    def forward(self, x, edge_index, edge_attr):
        x_new = self.conv(x, edge_index, edge_attr)
        x_new = self.v_update(x_new)
        return x+x_new
    
    def __repr__(self):
        return 'GAT_block'  

class GNN1(torch.nn.Module):
    def __init__(self,layers,d,d_type,out_d,node_embed_types,
                 edge_embed_types,heads,dropout,multiple_factor=2):
        super(GNN1, self).__init__()
        self.node_type_embed = Embedding(node_embed_types, d_type)
        self.edge_type_embed = Embedding(edge_embed_types, d_type)
        self.input_linear = Sequential(BatchNorm1d(in_d+d_type),
                                        Linear(in_d+d_type,d*2),
                                        LeakyReLU(inplace=True),
                                        BatchNorm1d(d*2),
                                        Linear(d*2,d),
                                        LeakyReLU(inplace=True))
        self.conv = nn.ModuleList([GAT_block(d,d_type,heads,dropout,multiple_factor) for _ in range(layers)])
        self.out_linear = Sequential(BatchNorm1d(d),
                                        Linear(d,d*2),
                                        LeakyReLU(inplace=True),
                                        BatchNorm1d(d*2),
                                        Linear(d*2,out_d))
        self.loss = CrossEntropyLoss()
        self.register_buffer('train_idx', train_idx)
        self.register_buffer('val_idx',val_idx)
        self.register_buffer('train_y', y[train_idx])
        self.register_buffer('val_y', y[val_idx])
        
    def forward(self, x, edge_index, node_type,edge_type,IsTrain=False):
        node_embed = self.node_type_embed(node_type)
        edge_embed = self.edge_type_embed(edge_type)
        x = torch.cat([x,node_embed],1)
        x = self.input_linear(x)
        for conv in self.conv:
            x = conv(x, edge_index, edge_embed)
        x = x[:4278] # take only the movie nodes
        x = self.out_linear(x)
        if IsTrain:
            train_loss = self.loss(x[self.train_idx],self.train_y)
            yhat = x[self.val_idx].detach().cpu().numpy().argmax(1)
            val_loss = f1_score(y.numpy()[val_idx],yhat,average='micro')
            return train_loss,val_loss
        else:
            return x        

In [16]:
model = GNN1(layers,d,d_type,out_d,node_embed_types,
                 edge_embed_types,heads,dropout)
model = model.to('cuda')
homo_data = homo_data.to('cuda')

In [17]:
# train
opt = Adam(model.parameters())
increase_count = 1
lossBest = -1
count = 0
opt.zero_grad()
for epoch in range(epochs):
    model.train()
    train_loss,val_loss = model(homo_data.x,homo_data.edge_index,homo_data.node_type,homo_data.edge_type,True)
    print("epoch:{}, train:{}, F1-val:{}".format(epoch,train_loss,val_loss))
    if val_loss>lossBest:
        lossBest = val_loss
        bestWeight = copy.deepcopy(model.state_dict())
        count = 0
    else:
        count += 1
        if count > increase_count:
            model.load_state_dict(bestWeight)
            break
    train_loss.backward()
    opt.step()
    opt.zero_grad()

epoch:0, train:1.1813830137252808, F1-val:0.35878787878787877
epoch:1, train:1.06571364402771, F1-val:0.4109090909090909
epoch:2, train:0.9005047678947449, F1-val:0.4484848484848485
epoch:3, train:0.7641838192939758, F1-val:0.5272727272727272
epoch:4, train:0.5908159017562866, F1-val:0.5915151515151515
epoch:5, train:0.44413045048713684, F1-val:0.64
epoch:6, train:0.3197781443595886, F1-val:0.6472727272727272
epoch:7, train:0.23818936944007874, F1-val:0.6787878787878788
epoch:8, train:0.16274069249629974, F1-val:0.6836363636363636
epoch:9, train:0.11465346068143845, F1-val:0.6715151515151515


In [18]:
# eval
model.eval()
with torch.no_grad():
    yhat = model(homo_data.x,homo_data.edge_index,homo_data.node_type,homo_data.edge_type)
yhat = yhat.detach().cpu().numpy().argmax(1)

'micro:{}, macro:{}'.format(f1_score(y.numpy()[val_idx],yhat[val_idx],average='micro'),
                            f1_score(y.numpy()[val_idx],yhat[val_idx],average='macro'))

'micro:0.39151515151515154, macro:0.1988953866146849'

### Model-2
Separate msg passing for each edge type

In [20]:
# hyper-parameter
layers=5
d=128
in_d=3066
out_d=3
dropout=0.1
multiple_factor=2

epochs = 10

In [21]:
class bipartite_msg(torch.nn.Module):
    def __init__(self,d,dropout,multiple_factor):
        super(bipartite_msg, self).__init__()   
        self.in_linear = Linear(d,d)
        self.out_linear = Sequential(Dropout(dropout),
                                    BatchNorm1d(2*d),
                                    Linear(2*d,multiple_factor*d),
                                    LeakyReLU(inplace=True),
                                    Dropout(dropout),
                                    BatchNorm1d(multiple_factor*d),
                                    Linear(multiple_factor*d,d),
                                    LeakyReLU(inplace=True))   
    
    def forward(self, M, N, adj_m2n):
        # message from M to N
        # adj_m2n is SparseTensor of shape (n,m)
        M = self.in_linear(M)
        M2N = adj_m2n.matmul(M)
        N_new = self.out_linear(torch.concat([N,M2N],1))
        return N + N_new

class bipartite_block(torch.nn.Module):
    def __init__(self,d,dropout,multiple_factor):
        super(bipartite_block, self).__init__()   
        self.m2a = bipartite_msg(d,dropout,multiple_factor)
        self.m2d = bipartite_msg(d,dropout,multiple_factor)
        self.d2m = bipartite_msg(d,dropout,multiple_factor)
        self.a2m = bipartite_msg(d,dropout,multiple_factor)
    
    def forward(self, M, A, D, adj_m2a, adj_m2d, adj_d2m, adj_a2m):
        A = self.m2a(M,A,adj_m2a)
        D = self.m2d(M,D,adj_m2d)
        M = self.d2m(D,M,adj_d2m)
        M = self.a2m(A,M,adj_a2m)
        return M, A, D

def MLP(in_d,out_d,multiple_factor):
    return Sequential(BatchNorm1d(in_d),
                        Linear(in_d,in_d*multiple_factor),
                        LeakyReLU(inplace=True),
                        BatchNorm1d(in_d*multiple_factor),
                        Linear(in_d*multiple_factor,out_d),
                        LeakyReLU(inplace=True))

class GNN2(torch.nn.Module):
    def __init__(self,layers,d,in_d,out_d,
                 dropout,multiple_factor=2):
        super(GNN2, self).__init__()

        self.input_linear_A = MLP(in_d,d,multiple_factor)
        self.input_linear_D = MLP(in_d,d,multiple_factor)
        self.input_linear_M = MLP(in_d,d,multiple_factor)
        self.conv = nn.ModuleList([bipartite_block(d,dropout,multiple_factor) for _ in range(layers)])
        self.out_linear = MLP(d,out_d,multiple_factor)

        self.loss = CrossEntropyLoss()
        self.register_buffer('train_idx', train_idx)
        self.register_buffer('val_idx',val_idx)
        self.register_buffer('train_y', y[train_idx])
        self.register_buffer('val_y', y[val_idx])
        
    def forward(self,M,A,D,adj_m2d,adj_m2a,adj_d2m,adj_a2m,IsTrain=False):
        M = self.input_linear_M(M)
        A = self.input_linear_A(A)
        D = self.input_linear_D(D)
        for conv in self.conv:
            M, A, D = conv(M, A, D, adj_m2a, adj_m2d, adj_d2m, adj_a2m)
        x = self.out_linear(M)
        if IsTrain:
            train_loss = self.loss(x[self.train_idx],self.train_y)
            model.eval()
            with torch.no_grad():
                val_loss = self.loss(x[self.val_idx],self.val_y)
            model.train()
            return train_loss,val_loss
        else:
            return x

In [22]:
model = GNN2(layers,d,in_d,out_d,
                 dropout,multiple_factor).to('cuda')

In [23]:
# data
M,A,D = data['movie'].x, data['actor'].x, data['director'].x

m2d_idx = data['movie', 'to', 'director']['edge_index']
m2a_idx = data['movie', 'to', 'actor']['edge_index']
d2m_idx = data['director', 'to', 'movie']['edge_index']
a2m_idx = data['actor', 'to', 'movie']['edge_index']

adj_m2d = SparseTensor(row=m2d_idx[1],col=m2d_idx[0])
adj_m2a = SparseTensor(row=m2a_idx[1],col=m2a_idx[0])
adj_d2m = SparseTensor(row=d2m_idx[1],col=d2m_idx[0])
adj_a2m = SparseTensor(row=a2m_idx[1],col=a2m_idx[0])

M,A,D,adj_m2d,adj_m2a,adj_d2m,adj_a2m = [d.to('cuda') for d in [M,A,D,adj_m2d,adj_m2a,adj_d2m,adj_a2m]]

In [24]:
# train
opt = Adam(model.parameters())
increase_count = 1
lossBest = 1e6
count = 0
opt.zero_grad()
for epoch in range(epochs):
    model.train()
    train_loss,val_loss = model(M,A,D,adj_m2d,adj_m2a,adj_d2m,adj_a2m,True)
    print("epoch:{}, train:{}, val:{}".format(epoch,train_loss,val_loss))
    if val_loss<lossBest:
        lossBest = val_loss
        bestWeight = copy.deepcopy(model.state_dict())
        count = 0
    else:
        count += 1
        if count > increase_count:
            model.load_state_dict(bestWeight)
            break
    train_loss.backward()
    opt.step()
    opt.zero_grad()

epoch:0, train:1.124171257019043, val:1.1480956077575684
epoch:1, train:0.9661462306976318, val:1.074554443359375
epoch:2, train:0.7229357957839966, val:0.9850589632987976
epoch:3, train:0.49748778343200684, val:0.9109905958175659
epoch:4, train:0.33336037397384644, val:0.8700106739997864
epoch:5, train:0.23222337663173676, val:0.8673908114433289
epoch:6, train:0.1734304130077362, val:0.8900783658027649
epoch:7, train:0.13900938630104065, val:0.8993144631385803


In [25]:
# eval
model.eval()
with torch.no_grad():
    yhat = model(M,A,D,adj_m2d,adj_m2a,adj_d2m,adj_a2m)
yhat = yhat.detach().cpu().numpy().argmax(1)

'micro:{}, macro:{}'.format(f1_score(y.numpy()[val_idx],yhat[val_idx],average='micro'),
                            f1_score(y.numpy()[val_idx],yhat[val_idx],average='macro'))

'micro:0.48531139835487663, macro:0.4075957051873604'