In [144]:
# interactive reimport
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [236]:
import sys, logging, time
import os.path as osp
import torch
from model import Model
from torch_geometric.loader import NeighborLoader
import numpy as np

In [237]:
import argparse
parser = argparse.ArgumentParser(description = 'pytorch version of GraphSAGE')
parser.add_argument('--data', type = str, default = 'cora')
# parser.add_argument('--aggr_func', type = str, default = 'MEAN') # dead argmument
parser.add_argument('--num_epochs', type = int, default = 10)
parser.add_argument('--batch_size', type = int, default = 128)
parser.add_argument('--seed', type = int, default = 13)
parser.add_argument('--cuda', action = 'store_true', help = 'use CUDA')
parser.add_argument('--num_neg_samples', type = int, default = 10) # dead argument
parser.add_argument('--lr', type = float, default = 0.1)
args = parser.parse_args(args=['--cuda'])

In [238]:
args

Namespace(data='cora', num_epochs=10, batch_size=128, seed=13, cuda=True, num_neg_samples=10, lr=0.1)

In [239]:
np.random.seed(args.seed)
torch.manual_seed(args.seed)

logging.basicConfig(level = logging.INFO, format = '%(asctime)s - %(levelname)s - %(mesmodel)s')
args.device = torch.device("cuda" if args.cuda else "cpu")
logging.info('Device:' + str(args.device))

2023-01-14 22:26:17,169 - INFO - Device:cuda


In [240]:
data_name = 'cora' 
attributes_file_name = osp.join('../data', data_name, 'attributes')
labels_file_name = osp.join('../data', data_name, 'labels')
valid_file_name = osp.join('../data', data_name, 'valid_nodes')

features = np.loadtxt(attributes_file_name, dtype=np.float32)
labels = np.loadtxt(labels_file_name, dtype=np.int64)[:,1]
valid_all_nodes_list = np.loadtxt(valid_file_name, dtype = np.int64)

In [241]:
from data_handler import update_viewed_all_nodes_and_edges, generate_whole_graph
def load_graph(t=14):
	stream_edges_dir_name = osp.join('../data', data_name, 'stream_edges')
	viewed_all_nodes, viewed_all_edges = None, None
	for tt in range(t):
		coming_edges = np.loadtxt(osp.join(stream_edges_dir_name, str(tt)), dtype=int)
		viewed_all_nodes, viewed_all_edges = update_viewed_all_nodes_and_edges(
								coming_edges, viewed_all_nodes, viewed_all_edges) 
		graph, valid_nodes = generate_whole_graph(viewed_all_nodes, viewed_all_edges, valid_all_nodes_list, features, labels)
	return graph, valid_nodes

graph, valid_nodes = load_graph()

In [242]:
# Model parameter
input_dim = graph.x.shape[1] # 1433
hidden_dim = 64
output_dim = len(np.unique(graph.y)) # 7
num_layers = 3

In [243]:
# Model definition
model = Model(in_channels=input_dim, hidden_channels=hidden_dim, out_channels=output_dim, num_layers=num_layers).to(args.device)
print(model)
# Model optimizer, may change into adam
optimizer = torch.optim.SGD(model.parameters(), lr = args.lr)

Model(
  (sage): GraphSAGE(1433, 64, num_layers=2)
  (lin): Linear(in_features=64, out_features=7, bias=True)
)


In [244]:
train_mask = np.ones(len(graph.x), dtype=int)
test_mask = np.zeros(len(graph.x), dtype=int)
test_mask[valid_nodes] = 1
train_mask -= test_mask

In [245]:
train_mask.sum(), test_mask.sum()

(1908, 800)

In [246]:
graph.train_mask = train_mask.astype(bool)
graph.test_mask = test_mask.astype(bool)

In [247]:
graph = graph.to(args.device, 'x', 'y')

In [248]:
from torch_geometric.loader import NeighborLoader

In [249]:
train_loader = NeighborLoader(
	graph, 
	num_neighbors=[args.num_neg_samples] * (num_layers - 1),
	input_nodes=graph.train_mask,
	shuffle=True,
	batch_size=args.batch_size)

In [250]:
import copy
valid_loader = NeighborLoader(
	copy.copy(graph),
	input_nodes = None,
	num_neighbors=[-1],
	shuffle = False,
	batch_size = args.batch_size
)

In [251]:
sampled_data = next(iter(train_loader))
print(sampled_data)

Data(x=[157, 1433], edge_index=[2, 336], y=[157], num_nodes=157, train_mask=[157], test_mask=[157], input_id=[128], batch_size=128)


In [252]:
sampled_data.test_mask

tensor([False, False, False,  True,  True, False,  True, False, False,  True,
        False, False, False,  True, False, False, False,  True,  True,  True,
        False, False,  True,  True, False,  True,  True, False, False,  True,
         True, False, False,  True, False,  True, False, False,  True,  True,
        False, False, False, False, False, False, False,  True, False,  True,
        False, False,  True, False, False,  True, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False,
        False, False,  True, False, False, False, False,  True,  True,  True,
        False, False, False, False, False, False, False, False,  True, False,
        False,  True, False, False,  True,  True,  True, False, False, False,
         True,  True, False, False, False, False, False,  True, False,  True,
         True, False,  True, False, False, False, False, False, False,  True,
         True, False, False, False,  True, False,  True, False, 

In [253]:
sampled_data.train_mask

tensor([ True,  True,  True, False, False,  True, False,  True,  True, False,
         True,  True,  True, False,  True,  True,  True, False, False, False,
         True,  True, False, False,  True, False, False,  True,  True, False,
        False,  True,  True, False,  True, False,  True,  True, False, False,
         True,  True,  True,  True,  True,  True,  True, False,  True, False,
         True,  True, False,  True,  True, False,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True, False, False, False,
         True,  True,  True,  True,  True,  True,  True,  True, False,  True,
         True, False,  True,  True, False, False, False,  True,  True,  True,
        False, False,  True,  True,  True,  True,  True, False,  True, False,
        False,  True, False,  True,  True,  True,  True,  True,  True, False,
        False,  True,  True,  True, False,  True, False,  True, 

In [254]:
sampled_data.x.dtype, sampled_data.y.dtype 

(torch.float32, torch.int64)

In [255]:
next(iter(model.parameters())).dtype

torch.float32

In [256]:
# Model training
from tqdm import tqdm
def train(epoch):
	model.train()
	pbar = tqdm(total=int(len(train_loader.dataset)))
	pbar.set_description(f'Epoch{ epoch:02d}')
	total_loss = 0
	total_correct = 0
	total_examples = 0
	for batch in train_loader:
		batch = batch.to(args.device)
		optimizer.zero_grad()
		y_pred = model.forward(batch)[:batch.batch_size]
		y_true = batch.y[:batch.batch_size]
		total_correct += int((y_pred.argmax(dim=-1) == y_true).sum())
		loss = model.loss(batch)
		loss.backward()
		optimizer.step()
		loss = loss.data.item()
		total_loss += loss * batch.batch_size
		total_examples += batch.batch_size
		pbar.update(batch.batch_size)
	pbar.close()
	return total_loss/total_examples, total_correct / total_examples

In [265]:
for epoch in range(1,args.num_epochs+1):
    avg_loss, acc = train(epoch)
    print(f'Epoch {epoch:02d}, Loss: {avg_loss:.4f}, Train accuracy: {acc:.4f}')

Epoch01: 100%|██████████| 2708/2708 [00:00<00:00, 7783.93it/s]


Epoch 01, Loss: 0.0006, Train accuracy: 1.0000


Epoch02: 100%|██████████| 2708/2708 [00:00<00:00, 6726.74it/s]


Epoch 02, Loss: 0.0006, Train accuracy: 1.0000


Epoch03: 100%|██████████| 2708/2708 [00:00<00:00, 8589.78it/s]


Epoch 03, Loss: 0.0005, Train accuracy: 1.0000


Epoch04: 100%|██████████| 2708/2708 [00:00<00:00, 6862.31it/s]


Epoch 04, Loss: 0.0005, Train accuracy: 1.0000


Epoch05: 100%|██████████| 2708/2708 [00:00<00:00, 9517.50it/s] 


Epoch 05, Loss: 0.0004, Train accuracy: 1.0000


Epoch06: 100%|██████████| 2708/2708 [00:00<00:00, 9254.08it/s]


Epoch 06, Loss: 0.0004, Train accuracy: 1.0000


Epoch07: 100%|██████████| 2708/2708 [00:00<00:00, 7349.95it/s]


Epoch 07, Loss: 0.0004, Train accuracy: 1.0000


Epoch08: 100%|██████████| 2708/2708 [00:00<00:00, 6572.46it/s]


Epoch 08, Loss: 0.0003, Train accuracy: 1.0000


Epoch09: 100%|██████████| 2708/2708 [00:00<00:00, 7383.14it/s]


Epoch 09, Loss: 0.0003, Train accuracy: 1.0000


Epoch10: 100%|██████████| 2708/2708 [00:00<00:00, 7988.20it/s]

Epoch 10, Loss: 0.0003, Train accuracy: 1.0000





In [258]:
from sklearn.metrics import f1_score

In [266]:
@torch.no_grad()
def test():
	model.eval()
	y_pred = model.inference(valid_loader, args.device).argmax(dim=-1).cpu()
	print(y_pred.shape)
	y_true = valid_loader.data.y.cpu()
	print(y_true.shape)
	logging.info("Validation Macro F1:" +  str(np.round(f1_score(y_true[valid_loader.data.test_mask], y_pred[valid_loader.data.test_mask], average="macro"), 6)))
	logging.info("Validation Micro F1:" +  str(np.round(f1_score(y_true[valid_loader.data.test_mask], y_pred[valid_loader.data.test_mask], average="micro"), 6)))

In [268]:
y_true = valid_loader.data.y.cpu()
y_true[valid_loader.data.test_mask].unique()

tensor([0, 1, 2, 3, 4, 5, 6])

In [269]:
y_true.unique()

tensor([0, 1, 2, 3, 4, 5, 6])

In [270]:
test()

2023-01-14 22:28:39,235 - INFO - Validation Macro F1:0.080546
2023-01-14 22:28:39,239 - INFO - Validation Micro F1:0.205


torch.Size([2708])
torch.Size([2708])
