In [79]:
import os.path as osp
import torch
from torch_geometric.data import Data, InMemoryDataset
import pickle
from tqdm import tqdm

In [99]:
class MyDataset(InMemoryDataset):
    
    def __init__(self, root='../processed_data/tky', sets='train', transform=None, pre_transform=None):
        self.sets= sets
        super().__init__(root, transform, pre_transform)
        
        self.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return [f'{self.sets}.pkl']
    
   
    @property
    def processed_file_names(self):
        return [f'seq_graph_{self.sets}.pt']
    
    def download(self):
        pass
    
    def process(self):
        with open(osp.join(self.raw_dir,self.raw_file_names[0]), 'rb') as f:
            data = pickle.load(f)
            user_no, poi_no = pickle.load(f)
        
        datalist=[]
        for uid, poi, seq, coord, y in tqdm(data):
            nodes={}
            idx=0
            idx_seq=[]
            for node in seq:
                if node not in nodes.keys():
                    # print(node)
                    nodes[node]=idx
                    idx+=1
                idx_seq.append(nodes[node])
                    
            x= torch.LongTensor(list(nodes.keys()))
            edge_index= torch.LongTensor([idx_seq[:-1],idx_seq[1:]])
            y = torch.LongTensor([y])
            uid = torch.LongTensor([uid])
            poi = torch.LongTensor([poi])
            coord = torch.Tensor(coord)
            
            datalist.append( Data(x=x, edge_index=edge_index, y=y, uid=uid, poi=poi, coord=coord))
            
        self.save(datalist, self.processed_paths[0])

        
        

In [101]:
train_set = MyDataset(root='../processed_data/tky', sets='train')

Processing...
100%|███████████████████████████████| 1138234/1138234 [05:53<00:00, 3216.58it/s]
Done!


In [102]:
test_set = MyDataset(root='../processed_data/tky', sets='test')

Processing...
100%|█████████████████████████████████████| 2293/2293 [00:00<00:00, 8516.12it/s]
Done!


In [103]:
val_set = MyDataset(root='../processed_data/tky', sets='val')

Processing...
100%|█████████████████████████████████████| 2293/2293 [00:00<00:00, 8892.08it/s]
Done!


In [88]:
from torch_geometric.loader import DataLoader

In [104]:
train_loader = DataLoader(dataset=train_set, batch_size=32 , shuffle=True)

In [105]:
sampled_data = next(iter(train_loader))
sampled_data[:10]

[Data(x=[44], edge_index=[2, 118], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[19], edge_index=[2, 83], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[51], edge_index=[2, 73], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[55], edge_index=[2, 85], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[22], edge_index=[2, 104], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[52], edge_index=[2, 99], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[33], edge_index=[2, 260], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[34], edge_index=[2, 59], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[25], edge_index=[2, 37], y=[1], uid=[1], poi=[1], coord=[2]),
 Data(x=[23], edge_index=[2, 65], y=[1], uid=[1], poi=[1], coord=[2])]

In [106]:
sampled_data[0].x

tensor([   36,    21,   924,   388,  1486,   312,  1429,  3449,    45,  6089,
         3379,  8225,   576,   679,  8962,   159,   563,  1188,  2304,  2323,
         2263,  3340, 17952,    27,   247, 10194, 17124,  1375, 21909,  1338,
         1393,  5545,   177,  2618, 23223, 23560,   474, 12400, 23774,   799,
        26076,   480,  1726,   353])

In [107]:
sampled_data[0].edge_index

tensor([[ 0,  1,  2,  3,  4,  5,  5,  5,  6,  5,  5,  0,  5,  5,  0,  7,  8,  9,
         10,  5,  0,  5,  5,  0, 11,  0,  5,  5,  0,  1, 12, 13,  1, 14,  5,  5,
          5,  0,  0,  5,  5,  0,  5,  5,  1,  7,  5, 15, 16, 17,  1,  1,  6, 18,
         19, 20,  5,  0,  0,  5,  5,  0,  5,  1,  6, 18, 21,  5,  5,  0, 22, 22,
         23,  5, 24,  0, 25,  5, 26, 27, 28, 29, 30, 31,  5,  0,  0,  5,  5,  5,
          5,  5, 32, 33, 34,  5,  5, 35,  5,  1, 36, 37, 38, 23, 18, 39,  0,  5,
         40, 41,  5,  0,  5,  0,  1,  7, 42, 11],
        [ 1,  2,  3,  4,  5,  5,  5,  6,  5,  5,  0,  5,  5,  0,  7,  8,  9, 10,
          5,  0,  5,  5,  0, 11,  0,  5,  5,  0,  1, 12, 13,  1, 14,  5,  5,  5,
          0,  0,  5,  5,  0,  5,  5,  1,  7,  5, 15, 16, 17,  1,  1,  6, 18, 19,
         20,  5,  0,  0,  5,  5,  0,  5,  1,  6, 18, 21,  5,  5,  0, 22, 22, 23,
          5, 24,  0, 25,  5, 26, 27, 28, 29, 30, 31,  5,  0,  0,  5,  5,  5,  5,
          5, 32, 33, 34,  5,  5, 35,  5,  1, 36, 37, 38, 23

In [108]:
sampled_data[0].coord

tensor([ 35.6556, 139.7567])