In [1]:
import datatable as dt

In [2]:
biogrid_mv = dt.fread("data/BIOGRID-MV-Physical-3.5.187.tab3.txt",  header=True,)

In [3]:
with open(file="data/BIOGRID-MV-Physical-3.5.187.tab3.txt") as fi:
  ww = fi.readline().rstrip().split("\t")
biogrid_mv.names = ww



In [4]:
biogrid_mv_human = biogrid_mv[ list(map(lambda x,y: (x==9606)&(y==9606), biogrid_mv["Organism Interactor A"].to_list()[0], biogrid_mv["Organism Interactor B"].to_list()[0])),:]

In [5]:
edges = list(set(list(zip(biogrid_mv_human["Official Symbol Interactor A"].to_list()[0], biogrid_mv_human["Official Symbol Interactor B"].to_list()[0]))))

In [6]:
edges = list(set(list(map(lambda x: tuple(sorted(x)), edges))))

In [7]:
edges_zipped = list(zip(*edges))

In [8]:
all_nodes = sorted(set.union(set(edges_zipped[0]), set(edges_zipped[1])))

In [9]:
import torch
from torch_geometric.data import Data

In [10]:
node2ind = dict(zip(all_nodes, range(len(all_nodes))))

In [11]:
edges_zipped_indices = [list(map(lambda x: node2ind[x], edges_zipped[0])),
                        list(map(lambda x: node2ind[x], edges_zipped[0]))]


In [56]:
edge_index = torch.tensor(edges_zipped_indices, dtype=torch.long)
edge_index.dtype


torch.int64

In [57]:
from numpy import random
x = torch.tensor([random.randint(-1,2,size=len(all_nodes)),
     random.randint(-1,2,size=len(all_nodes)),
     random.randint(-1,2,size=len(all_nodes)),
     random.randint(-1,2,size=len(all_nodes))], dtype=torch.float).t()
y = torch.tensor(random.randint(0,4,len(all_nodes)), dtype=torch.long)
train_mask = torch.tensor(random.random_sample(size=len(all_nodes)) > .8, dtype=torch.bool)


In [58]:
mydata = Data(edge_index=edge_index, x = x, y = y, train_mask = train_mask, num_classes = 4)

In [59]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(mydata.num_node_features, 2)
        self.conv2 = GCNConv(2, mydata.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        import pdb
        # pdb.set_trace()

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
mydata = mydata.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(mydata)
    loss = F.nll_loss(out[mydata.train_mask], mydata.y[mydata.train_mask])
    loss.backward()
    optimizer.step()

In [63]:
model.eval()
a, pred = model(mydata).max(dim=1)

In [67]:
data = mydata
correct = int(pred[torch.logical_not(data.train_mask)].eq(data.y[torch.logical_not(data.train_mask)]).sum().item())

In [71]:
acc = correct / int(torch.logical_not(data.train_mask).sum())
acc

0.2498763600395648

In [50]:
from torch_geometric.datasets import Planetoid

planetset = Planetoid(root='/tmp/Cora', name='Cora')

planets = planetset[0]

In [55]:
planets.edge_index.dtype

torch.int64