In [1]:
import torch
!pip install torch-geometric
!pip install pymetis
!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.11.0+cu113.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-geometric
  Downloading torch_geometric-2.0.4.tar.gz (407 kB)
[K     |████████████████████████████████| 407 kB 10.7 MB/s 
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.0.4-py3-none-any.whl size=616603 sha256=58a8f7f4bcdbfcc363cd3110db77799a9796bbfa9384c58559f06b571156687d
  Stored in directory: /root/.cache/pip/wheels/18/a6/a4/ca18c3051fcead866fe7b85700ee2240d883562a1bc70ce421
Successfully built torch-geometric
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.0.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pymetis
  Downloading PyMetis-2020.1.tar.gz (297 kB)
[K     |████████████████████████████████| 297 kB 14.3 MB/s 
Building 

In [2]:
torch.__version__

'1.11.0+cu113'

In [3]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)
%cd  drive/MyDrive/MixupForGraph-main/MixupForGraph-main/

Mounted at /content/drive
/content/drive/MyDrive/MixupForGraph-main/MixupForGraph-main


# Clustering based approach

In [5]:
import pymetis
from sklearn.cluster import KMeans
def basic_aug(graph, num_nodes):
    graph.y = torch.cat((graph.y, torch.Tensor([0]*num_nodes)), axis=0)
    graph.train_mask = torch.cat((graph.train_mask, torch.Tensor([False]*num_nodes)), axis=0)
    graph.val_mask = torch.cat((graph.val_mask, torch.Tensor([False]*num_nodes)), axis=0)
    graph.test_mask = torch.cat((graph.test_mask, torch.Tensor([False]*num_nodes)), axis=0)
    return graph

def single_aug(graph):
    new_edges = torch.zeros(2,graph.x.shape[0]*2)
    new_edges[1,:graph.x.shape[0]] = torch.arange(graph.x.shape[0])
    new_edges[0,:graph.x.shape[0]] = torch.Tensor([graph.x.shape[0]]*graph.x.shape[0])
    new_edges[0,graph.x.shape[0]:] = torch.arange(graph.x.shape[0])
    new_edges[1,graph.x.shape[0]:] = torch.Tensor([graph.x.shape[0]]*graph.x.shape[0])
    graph.edge_index = torch.cat((graph.edge_index, new_edges), axis=1)
    graph.x = torch.cat((graph.x, graph.x.mean(axis=0).view(1,-1)), axis=0)
    graph = basic_aug(graph, 1)
    return graph

def cluster_aug(graph, algo, clusters=2):
    if algo == 'metis':
        adj_list = [graph.edge_index[1,graph.edge_index[0,:]==idx] for idx in range(graph.x.shape[0])]
        _, membership = pymetis.part_graph(clusters, adjacency=adj_list)
    elif algo == 'kmeans':
        kmeans = KMeans(n_clusters=clusters, random_state=0).fit(graph.x)
        membership = kmeans.labels_
    
    cluster_idx = graph.x.shape[0]
    
    for cluster in range(clusters):
        idxs = np.argwhere(np.array(membership) == cluster).reshape(-1)
        cluster_nodes = idxs.shape[0]
        new_edges = torch.zeros(2,cluster_nodes*2)
        new_edges[1,:cluster_nodes] = torch.Tensor(idxs)
        new_edges[0,:cluster_nodes] = torch.Tensor([cluster_idx+cluster]*cluster_nodes)
        new_edges[0,cluster_nodes:] = torch.Tensor(idxs)
        new_edges[1,cluster_nodes:] = torch.Tensor([cluster_idx+cluster]*cluster_nodes)

        graph.edge_index = torch.cat((graph.edge_index, new_edges), axis=1)
        graph.x = torch.cat((graph.x, graph.x[idxs].mean(axis=0).view(1,-1)), axis=0)

    _, indices = torch.sort(graph.edge_index[0,:])
    graph.edge_index = graph.edge_index[:,indices]
    graph = basic_aug(graph, clusters)
    return graph

def scluster_aug(graph, algo, clusters=2):
    old_cluster_idx = graph.x.shape[0]
    graph = cluster_aug(graph, algo, clusters)
    new_cluster_idx = graph.x.shape[0]

    idxs = torch.arange(old_cluster_idx, new_cluster_idx)
    cluster_nodes = new_cluster_idx-old_cluster_idx

    new_edges = torch.zeros(2,cluster_nodes*2)
    new_edges[1,:cluster_nodes] = torch.Tensor(idxs)
    new_edges[0,:cluster_nodes] = torch.Tensor([new_cluster_idx]*cluster_nodes)
    new_edges[0,cluster_nodes:] = torch.Tensor(idxs)
    new_edges[1,cluster_nodes:] = torch.Tensor([new_cluster_idx]*cluster_nodes)

    graph.edge_index = torch.cat((graph.edge_index, new_edges), axis=1)
    graph.x = torch.cat((graph.x, graph.x[idxs].mean(axis=0).view(1,-1)), axis=0)

    graph = basic_aug(graph, 1)
    return graph

def hcluster_aug(graph, algo, clusters=(5,5)):
    if algo == 'metis':
        adj_list = [graph.edge_index[1,graph.edge_index[0,:]==idx] for idx in range(graph.x.shape[0])]
        _, membership = pymetis.part_graph(clusters, adjacency=adj_list)
    elif algo == 'kmeans':
        kmeans = KMeans(n_clusters=clusters, random_state=0).fit(graph.x)
        membership = kmeans.labels_

    total_clusters = clusters[0]*(clusters[1]+1)

    cluster_idx = graph.x.shape[0]
    
    for cluster in range(clusters[0]):
        idxs = np.argwhere(np.array(membership) == cluster).reshape(-1)
        id_to_id = {i:int(idx) for i, idx in enumerate(idxs)}
        curr_cluster = cluster_idx+(cluster*(clusters[1]+1))
        curr_members = []

        if algo == 'metis':
            hadj_list = [np.argwhere(x).reshape(-1) for x in adj[idxs, idxs]]
            _, hmembership = pymetis.part_graph(clusters[1], adjacency=hadj_list)
        elif algo == 'kmeans':
            hkmeans = KMeans(n_clusters=clusters[1], random_state=0).fit(features[idxs])
            hmembership = hkmeans.labels_
        
        for hcluster in range(clusters[1]):
            hidxs = np.argwhere(np.array(hmembership) == hcluster).reshape(-1)
            hidxs = np.array([id_to_id[x[0]] for x in hidxs])

            curr_hcluster = cluster_idx+(cluster*(clusters[1]+1))+hcluster+1
            curr_members.append(curr_hcluster)

            adj_new[curr_hcluster,hidxs] = 1

            features_new[curr_hcluster] = np.mean(features[hidxs], axis=0)

            labels_new[curr_hcluster] = np.identity(labels.shape[1])[0, :]
        
        adj_new[curr_cluster,np.array(curr_members)] = 1

        features_new[curr_cluster] = np.mean(features_new[np.array(curr_members)], axis=0)

        labels_new[curr_cluster] = np.identity(labels.shape[1])[0, :]

    graph = basic_aug(graph, total_clusters)
    return graph

def augment_graph(graph, aug_type='single', clusters=2):

    if aug_type == 'single':
        graph = single_aug(graph)
    elif aug_type == 'metis' or aug_type == 'kmeans':
        graph = cluster_aug(graph, aug_type, clusters)
    elif aug_type == 'smetis' or aug_type == 'skmeans':
        graph = scluster_aug(graph, aug_type[1:], clusters)
    elif aug_type == 'hmetis' or aug_type == 'hkmeans':
        graph = hcluster_aug(graph, aug_type[1:], clusters)

    graph.edge_index = graph.edge_index.to(torch.long)
    graph.y = graph.y.to(torch.long)
    graph.train_mask = graph.train_mask.to(torch.bool)
    graph.val_mask = graph.val_mask.to(torch.bool)
    graph.test_mask = graph.test_mask.to(torch.bool)

    return graph

# Mixup

In [6]:
import os.path as osp
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import argparse
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid, Coauthor
from torch_geometric.datasets import CoraFull 
from torch_geometric.data import Data
from graph_conv import GraphConv
from torch_geometric.utils import degree
from torch_sparse import SparseTensor
import torch_geometric.transforms as T
# from torch_geometric.nn from GCNConv

import pdb
import numpy as np
import random
import copy
import argparse

#parser = argparse.ArgumentParser('Mixup')
#parser.add_argument('--mixup', action='store_true', help='Whether to have Mixup')
#args = parser.parse_args()


## If this arg is "Mixup" then Mixup is being applied else no
args = 'Mixup'

def idNode(data, id_new_value_old):
    data = copy.deepcopy(data)
    data.x = None
    data.y[data.val_id] = -1
    data.y[data.test_id] = -1
    data.y = data.y[id_new_value_old]

    data.train_id = None
    data.test_id = None
    data.val_id = None

    id_old_value_new = torch.zeros(id_new_value_old.shape[0], dtype = torch.long)
    id_old_value_new[id_new_value_old] = torch.arange(0, id_new_value_old.shape[0], dtype = torch.long)
    row = data.edge_index[0]
    col = data.edge_index[1]
    row = id_old_value_new[row]
    col = id_old_value_new[col]
    data.edge_index = torch.stack([row, col], dim=0)

    return data

def shuffleData(data):
    data = copy.deepcopy(data)
    id_new_value_old = np.arange(data.num_nodes)
    train_id_shuffle = copy.deepcopy(data.train_id)
    np.random.shuffle(train_id_shuffle)
    ##new initialization happening here post random shuffling
    id_new_value_old[data.train_id] = train_id_shuffle
    data = idNode(data, id_new_value_old)

    return data, id_new_value_old


class Net(torch.nn.Module):
    def __init__(self, hidden_channels, in_channel, out_channel):
        super(Net, self).__init__()
        self.conv1 = GraphConv(in_channel, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.conv4 = GraphConv(hidden_channels, 32)
        self.lin = torch.nn.Linear(1 * 32, out_channel)
        # self.conv1 = GCNConv(in_channel, hidden_channels)
        # self.conv2 = GCNConv(hidden_channels, out_channel)

    # data.x, data.edge_index, data_b.edge_index, lam, id_new_value_old

    def forward(self, x0, edge_index, edge_index_b, lam, id_new_value_old):

        x0 = F.dropout(x0, p=0.5, training=self.training)
        x1 = self.conv1(x0, edge_index, x0)
        x1 = F.relu(x1)
        x1 = F.dropout(x1, p=0.5, training=self.training)

        x2 = self.conv2(x1, edge_index, x1)
        x2 = F.relu(x2)
        x2 = F.dropout(x2, p=0.5, training=self.training)

        x3 = self.conv3(x2, edge_index, x2)
        x3 = F.relu(x3)
        x3 = F.dropout(x2, p=0.5, training=self.training)

        x0_b = x0[id_new_value_old]
        x1_b = x1[id_new_value_old]
        x2_b = x2[id_new_value_old]
        x3_b = x3[id_new_value_old]

        x0_mix = x0 * lam + x0_b * (1 - lam)

        new_x1 = self.conv1(x0, edge_index, x0_mix)
        new_x1_b = self.conv1(x0_b, edge_index_b, x0_mix)
        new_x1 = F.relu(new_x1)
        new_x1_b = F.relu(new_x1_b)

        x1_mix = new_x1 * lam + new_x1_b * (1 - lam)
        x1_mix = F.dropout(x1_mix, p=0.4, training=self.training)
        new_x2 = self.conv2(x1, edge_index, x1_mix)
        new_x2_b = self.conv2(x1_b, edge_index_b, x1_mix)
        new_x2 = F.relu(new_x2)
        new_x2_b = F.relu(new_x2_b)

        x2_mix = new_x2 * lam + new_x2_b * (1 - lam)
        x2_mix = F.dropout(x2_mix, p=0.4, training=self.training)

        x = x2_mix
        #x = self.lin(x)
        return F.log_softmax(x, dim=-1)

# set random seed
SEED = 0
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
np.random.seed(SEED)  # Numpy module.
random.seed(SEED)  # Python random module.


'''
Dataset choices are citeseer, cora or pubmed
'''


dataset = 'cora'
path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]

''' 
Uncomment any of the below lines if augmentation is your choice. Else run the code block 
to run without any augmentation. Additionally the args = Mixup from above also decides
whether mixup should be applied. Baseline can be run by setting args != Mixup and leaving the
below lines commented.
'''
#data = augment_graph(data, 'single')
#data = augment_graph(data, 'kmeans', 20)
#data = augment_graph(data, 'metis', 20)

# split data
node_id = np.arange(data.num_nodes)
print("Before ", node_id)
#np.random.shuffle(node_id)
print("After ",node_id)



## This datasplit for Cora. Uncomment the 3 lines below and comment the next 3 lines
'''
data.train_id = node_id[:140]
data.val_id = node_id[140:640]
data.test_id = node_id[1708:2708]
'''


## This datasplit is applied for both Citeseer and Pubmed. Comment this and use the above split if Cora is the choice
data.train_id = data.train_mask.argwhere().reshape(-1).cpu().numpy()
data.val_id = data.val_mask.argwhere().reshape(-1).cpu().numpy()
data.test_id = data.test_mask.argwhere().reshape(-1).cpu().numpy()



print(f"Number of features is {dataset.num_node_features} and number if classes is {dataset.num_classes} and nodes is {data.x.shape} and node is {data.num_nodes} ")

# define model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(hidden_channels=32, in_channel = dataset.num_node_features, out_channel = dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0006)


# func train one epoch
def train(data):
    model.train()

    if args == 'Mixup':
        lam = np.random.beta(4.0, 4.0)
    else:
        #print("No Mixup")
        lam = 1.0

    ##Shuffle Data
    data_b, id_new_value_old = shuffleData(data)
    data = data.to(device)
    data_b = data_b.to(device)
    

    optimizer.zero_grad()

    out = model(data.x, data.edge_index, data_b.edge_index, lam, id_new_value_old)
    loss = F.nll_loss(out[data.train_id], data.y[data.train_id]) * lam + \
           F.nll_loss(out[data.train_id], data_b.y[data.train_id]) * (1 - lam)

    loss.backward()
    optimizer.step()

    return loss.item()


# test
@torch.no_grad()
def test(data):
    model.eval()

    out = model(data.x.to(device), data.edge_index.to(device), data.edge_index.to(device), 1, np.arange(data.num_nodes))
    pred = out.argmax(dim=-1)
    correct = pred.eq(data.y.to(device))

    accs = []
    for _, id_ in data('train_id', 'val_id', 'test_id'):
        accs.append(correct[id_].sum().item() / id_.shape[0])
    return accs


best_acc = 0
accord_epoch = 0
accord_train_acc = 0
accord_train_loss = 0
for epoch in range(1, 5001):
    loss = train(data)
    accs = test(data)
    if epoch %50 == 0:
      print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train Acc: {accs[0]:.4f}, Test Acc: {accs[2]:.4f}')

Before  [   0    1    2 ... 2705 2706 2707]
After  [   0    1    2 ... 2705 2706 2707]
Number of features is 1433 and number if classes is 7 and nodes is torch.Size([2708, 1433]) and node is 2708 
Epoch: 50, Loss: 3.3791, Train Acc: 0.1429, Test Acc: 0.1030
Epoch: 100, Loss: 3.2001, Train Acc: 0.1429, Test Acc: 0.1030
Epoch: 150, Loss: 3.0255, Train Acc: 0.5214, Test Acc: 0.3270
Epoch: 200, Loss: 2.8978, Train Acc: 0.7143, Test Acc: 0.6240
Epoch: 250, Loss: 2.6351, Train Acc: 0.8714, Test Acc: 0.6650
Epoch: 300, Loss: 2.7859, Train Acc: 0.9714, Test Acc: 0.7250
Epoch: 350, Loss: 2.5957, Train Acc: 0.9857, Test Acc: 0.7750
Epoch: 400, Loss: 2.2130, Train Acc: 0.9929, Test Acc: 0.7820
Epoch: 450, Loss: 2.5732, Train Acc: 0.9929, Test Acc: 0.7810
Epoch: 500, Loss: 2.3473, Train Acc: 0.9929, Test Acc: 0.7840
Epoch: 550, Loss: 2.1376, Train Acc: 0.9929, Test Acc: 0.7920
Epoch: 600, Loss: 2.1529, Train Acc: 0.9929, Test Acc: 0.7840
Epoch: 650, Loss: 2.2686, Train Acc: 0.9929, Test Acc: 0.787

KeyboardInterrupt: ignored