In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [None]:
!pip install pypinyin pywubi zhconv overrides boto3
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.9.0+cu102.html
!pip install torch-geometric

Collecting pypinyin
  Downloading pypinyin-0.42.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 8.1 MB/s 
[?25hCollecting pywubi
  Downloading pywubi-0.0.2-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 38.3 MB/s 
[?25hCollecting zhconv
  Downloading zhconv-1.4.2.tar.gz (183 kB)
[K     |████████████████████████████████| 183 kB 39.9 MB/s 
[?25hCollecting overrides
  Downloading overrides-6.1.0-py3-none-any.whl (14 kB)
Collecting boto3
  Downloading boto3-1.18.6-py3-none-any.whl (131 kB)
[K     |████████████████████████████████| 131 kB 42.6 MB/s 
[?25hCollecting typing-utils>=0.0.3
  Downloading typing_utils-0.1.0-py3-none-any.whl (10 kB)
Collecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.0-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 7.4 MB/s 
[?25hCollecting botocore<1.22.0,>=1.21.6
  Downloading botocore-1.21.6-py3-none-any.whl (7.7 MB)
[K     |████████████████████████████

In [None]:
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_cluster import random_walk
from sklearn.linear_model import LogisticRegression

import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv
from torch_geometric.datasets import Planetoid
from torch_geometric.data import NeighborSampler as RawNeighborSampler
import pickle
import time

In [None]:
print(torch.__version__)
print(torch.cuda.get_device_name(0))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
start = time.perf_counter()

1.9.0+cu102
Tesla P100-PCIE-16GB


In [None]:
EPS = 1e-15

class NeighborSampler(RawNeighborSampler):
    def sample(self, batch):
        batch = torch.tensor(batch)
        row, col, _ = self.adj_t.coo()

        # For each node in `batch`, we sample a direct neighbor (as positive
        # example) and a random node (as negative example):
        pos_batch = random_walk(row, col, batch, walk_length=1,
                                coalesced=False)[:, 1]

        neg_batch = torch.randint(0, self.adj_t.size(1), (batch.numel(), ),
                                  dtype=torch.long)

        batch = torch.cat([batch, pos_batch, neg_batch], dim=0)
        return super(NeighborSampler, self).sample(batch)

class SAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(SAGE, self).__init__()
        self.gconv1 = SAGEConv(in_channels,hidden_channels)
        self.gconv2 = SAGEConv(hidden_channels, hidden_channels)

    def forward(self, x, adjs):
        for i, (edge_index, _, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            if i==0:
                x = self.gconv1((x, x_target), edge_index)
                x = x.relu()
                x = F.dropout(x,p=0.5,training=self.training)
            else:
                x = self.gconv2((x, x_target), edge_index)
        return x

    def full_forward(self, x, edge_index):
        x = self.gconv1(x,edge_index)
        x = x.relu()
        x = F.dropout(x,p=0.5,training=self.training)
        x = self.gconv2(x,edge_index)
        return x

In [None]:
def train(data,model,train_loader,optimizer):
    model.train()

    total_loss = 0
    for batch_size, n_id, adjs in train_loader:
        # `adjs` holds a list of `(edge_index, e_id, size)` tuples.
        adjs = [adj.to(device) for adj in adjs]
        optimizer.zero_grad()

        out = model(x[n_id], adjs)
        out, pos_out, neg_out = out.split(out.size(0) // 3, dim=0)

        pos_loss = F.logsigmoid((out * pos_out).sum(-1)).mean()
        neg_loss = F.logsigmoid(-(out * neg_out).sum(-1)).mean()
        loss = -pos_loss - neg_loss
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * out.size(0)

    return total_loss / data.num_nodes

@torch.no_grad()
def test(data,model,x,edge_index):
    model.eval()
    out = model.full_forward(x, edge_index).cpu()

    clf = LogisticRegression()
    clf.fit(out[data.train_mask], data.y[data.train_mask])

    val_acc = clf.score(out[data.val_mask], data.y[data.val_mask])
    test_acc = clf.score(out[data.test_mask], data.y[data.test_mask])

    return val_acc, test_acc

In [None]:
def getGraphEmbeddings(graphPath,numFeat):
    myGraphs = []
    with open(graphPath,"rb") as f:
        graphs = pickle.load(f)
        for zi in graphs:
            myGraphs.append(graphs[zi].to("cuda" if torch.cuda.is_available() else "cpu"))
    return myGraphs
myGraphs = getGraphEmbeddings("/content/gdrive/MyDrive/Colab Data/Chinese Characters/graphsDictOrder.pickle",6)

In [None]:
model = SAGE(6,hidden_channels=300).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
for epoch in range(1, 2):
    for i,data in enumerate(myGraphs):
        x, edge_index = data.x.to(device).float(), data.edge_index.to(device)
        train_loader = NeighborSampler(edge_index,sizes=[10, 10],batch_size=256,shuffle=True,num_nodes=data.num_nodes)
        for miniEpoch in range(1,11):    
            loss = train(data,model,train_loader,optimizer)
        if i%100==0:
            print("Step %04d/%04d: %.4f" % (i,len(myGraphs),loss))

Step 0000/9574: 8429.4375
Step 0100/9574: 1445.5065
Step 0200/9574: 2.2389
Step 0300/9574: 1.4107
Step 0400/9574: 1.3150
Step 0500/9574: 1.3640
Step 0600/9574: 1.3671
Step 0700/9574: 1.4199
Step 0800/9574: 5.3347
Step 0900/9574: 203.6454
Step 1000/9574: 1.5638
Step 1100/9574: 1.3962
Step 1200/9574: 1.3814
Step 1300/9574: 1.3632
Step 1400/9574: 1.4433
Step 1500/9574: 1.3787
Step 1600/9574: 1.7091
Step 1700/9574: 1.3993
Step 1800/9574: 1.3997
Step 1900/9574: 1.4525
Step 2000/9574: 1.3876
Step 2100/9574: 6.9155
Step 2200/9574: 1.7520
Step 2300/9574: 2.0483
Step 2400/9574: 1.3390
Step 2500/9574: 1.3878
Step 2600/9574: 9.0914
Step 2700/9574: 1.3877
Step 2800/9574: 26.9066
Step 2900/9574: 1.3533
Step 3000/9574: 1.3928
Step 3100/9574: 1.3859
Step 3200/9574: 1.2710
Step 3300/9574: 1.3875
Step 3400/9574: 1.4184
Step 3500/9574: 1.3986
Step 3600/9574: 2.4010
Step 3700/9574: 405.6048
Step 3800/9574: 1.4118
Step 3900/9574: 1.3961
Step 4000/9574: 1.3024
Step 4100/9574: 1.3866
Step 4200/9574: 1.3605


In [None]:
torch.save(model.state_dict(),"/content/gdrive/MyDrive/Colab Data/Chinese Characters/unsupGraphModelDict300.bin")
end = time.perf_counter()
elapsed = end-start
hours = elapsed//(60*60)
mins = (elapsed - hours*60*60)//60
secs = (elapsed - hours*60*60 - mins*60)
print("Time elapsed: %02d:%02d:%02d" % (hours,mins,secs))

Time elapsed: 00:07:14


In [None]:
class MyGCN(nn.Module):
    def __init__(self,layer,num_features,hidden,output_features,gcn_drop,k,pool):
        super(MyGCN, self).__init__()
        self.gcn_drop = gcn_drop
        self.k = k
        self.pool = pool

        if layer=="SAGE":
            self.gconv1 = SAGEConv(num_features,hidden)
            self.gconv2 = SAGEConv(hidden, hidden)
        elif layer=="GCN":
            self.gconv1 = GCNConv(num_features,hidden)
            self.gconv2 = GCNConv(hidden,hidden)

        self.conv1d = nn.Conv1d(hidden, 32, 5)
        self.linear1 = nn.Linear(32 * (self.k - 5 + 1), hidden)
        self.linear2 = nn.Linear(hidden, output_features)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = self.gconv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.gcn_drop, training=self.training)
        x = self.gconv2(x, edge_index)
        x = F.relu(x)

        if self.pool=="sort":
            x = global_sort_pool(x, batch, self.k)
            x = x.view(len(x), self.k, -1).permute(0, 2, 1)
            x = F.relu(self.conv1d(x))
            x = x.view(len(x), -1)
            x = F.relu(self.linear1(x))
            x = F.dropout(x, p=self.gcn_drop, training=self.training)
            x = self.linear2(x)
            x = F.sigmoid(x)
        elif self.pool=="max":
            x = global_max_pool(x, batch)
            x = F.dropout(x, p=self.gcn_drop, training=self.training)
            x = self.linear2(x)
            x = F.sigmoid(x)
        return x

In [None]:
gcn = MyGCN("SAGE",6,300,24,0.5,30,"sort")
gcn.load_state_dict(torch.load("/content/gdrive/MyDrive/Colab Data/Chinese Characters/unsupGraphModelDict300.bin"),strict=False)
gcn.eval()

MyGCN(
  (gconv1): SAGEConv(6, 300)
  (gconv2): SAGEConv(300, 300)
  (conv1d): Conv1d(300, 32, kernel_size=(5,), stride=(1,))
  (linear1): Linear(in_features=832, out_features=300, bias=True)
  (linear2): Linear(in_features=300, out_features=24, bias=True)
)