In [1]:
%load_ext autoreload
%autoreload 2
import torch
import json
import numpy as np
from torch.utils.data import DataLoader
import torch.optim as optim
from torch_geometric.loader import NeighborLoader
from torch_geometric.data import Data
from ffm_graph import *
from data import MINDDataset
from data_utils import *
from transformers import BertConfig
from gnn import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mind_dataset = MINDDataset('behaviors.pkl')
train_dataloader = DataLoader(mind_dataset, batch_size=4, collate_fn=collate_fn, num_workers=2)

In [3]:
news_encoder_config = BertConfig.from_json_file('news_encoder.json')
news_encoder = NewsEncoder(news_encoder_config)

user_encoder_config = BertConfig.from_json_file('user_encoder.json')
user_encoder = UserEncoder(user_encoder_config)

gnn = create_sage(nfeat=256, nhid=256, dropout=0.1, nlayer=3)

In [4]:
device = torch.device('cuda')
model = Fastformer_Graph(news_encoder, user_encoder, gnn).to(device)

In [5]:
def train_model(model, train_loader, global_graph_data, device, epochs=5):
	optimizer = optim.AdamW(model.parameters(), lr=1e-4)
	criterion = torch.nn.CrossEntropyLoss()
	
	model.to(device)

	for epoch in range(epochs):
		model.train()
		total_loss = 0

		for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
			batch = {k: v.to(device) for k, v in batch.items()}

			sub_loader = NeighborLoader(
				global_graph_data,
				num_neighbors=[10, 10, 10],
				input_nodes=batch['seed_nodes'].cpu(),
				batch_size=len(batch['seed_nodes']),
				shuffle=False
			)
			sub_graph = next(iter(sub_loader)).to(device)

			optimizer.zero_grad()

			scores = model(batch, sub_graph, device)
			loss = criterion(scores, batch['label'])

			loss.backward()
			optimizer.step()

			total_loss += loss.item()
		
		print(f'Epoch {epoch} done! AVG loss: {total_loss / len(train_loader):.4f}')

In [6]:
news_tokens = np.load('data/MINDsmall_train/news_token.npy')
x = torch.from_numpy(news_tokens).long()

edge_index = torch.load('edge_index.pt')
global_graph_data = Data(x=x, edge_index=edge_index)

In [7]:
batch = next(iter(train_dataloader))
batch = {k: v.to(device) for k, v in batch.items()}
sub_loader = NeighborLoader(
    global_graph_data,
    num_neighbors=[10, 10], 
    input_nodes=batch['seed_nodes'].cpu(),
    batch_size=len(batch['seed_nodes']),
    shuffle=False
)
sub_graph = next(iter(sub_loader)).to(device)

❌ Có lỗi rồi đại vương ơi!
Lỗi: CUDA out of memory. Tried to allocate 5.79 GiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 3.57 GiB is allocated by PyTorch, and 1.24 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)


In [9]:
train_model(model, train_dataloader, global_graph_data, device, 1)

Epoch 1:   0%|          | 0/59086 [00:09<?, ?it/s]


KeyboardInterrupt: 