In [31]:
import os
import torch
import pandas as pd
import torch.nn as nn
from Config import Config
from lightning import LightningModule
from utils import create_neg_sample
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from dataloader import CustomGraphDataset

In [24]:
## Steps to do
## TODO load the KG dataset
## TODO Make training/valid data using positive and negative samples
## TODO Convert to index (For embedding)
## TODO Initialize the embedding layer for nodes and edges
## TODO Loss function definition
## TODO Build nn module 
## TODO Train and test model

In [32]:
## Dummy dataset
dummy_graph_dict = {    "Head":[0,1,2,3,3,4],
              "Relation":[2,1,1,2,2,1],
                  "Tail":[1,2,0,5,4,5]
                  }
dummy_graph_df = pd.DataFrame.from_dict(dummy_graph_dict)
dummy_graph_df.head()

Unnamed: 0,Head,Relation,Tail
0,0,2,1
1,1,1,2
2,2,1,0
3,3,2,5
4,3,2,4


In [35]:

negative_sample_graph_df = create_neg_sample(dummy_graph_df)
dummy_graph_df["label"] = [1]*len(dummy_graph_df)
negative_sample_graph_df["label"] = [-1]*len(negative_sample_graph_df)

dataset_df = pd.concat([dummy_graph_df, negative_sample_graph_df], axis=0).reset_index(drop=True)
dataset_df.head(10)

Unnamed: 0,Head,Relation,Tail,true_label,label
0,0,2,1,1.0,1
1,1,1,2,1.0,1
2,2,1,0,1.0,1
3,3,2,5,1.0,1
4,3,2,4,1.0,1
5,4,1,5,1.0,1
6,5,1,1,,-1
7,5,1,4,,-1
8,4,2,2,,-1
9,1,2,0,,-1


In [39]:
X_train, X_test, y_train, y_test = train_test_split(dataset_df[["Head","Relation","Tail"]],
                                                     dataset_df["label"], 
                                                     test_size=0.2, 
                                                     random_state=42)
## Reset the index
X_train.reset_index(drop=True, inplace = True) 
X_test.reset_index(drop=True, inplace = True) 
y_train.reset_index(drop=True, inplace = True) 
y_test.reset_index(drop=True, inplace = True)


print(X_train, X_test, y_train, y_test)

   Head  Relation  Tail
0     4         2     2
1     4         1     5
2     2         1     0
3     1         1     2
4     3         1     0
5     3         2     4
6     5         1     4
7     3         2     5
8     5         1     1    Head  Relation  Tail
0     0         1     3
1     1         2     0
2     0         2     1 0   -1
1    1
2    1
3    1
4   -1
5    1
6   -1
7    1
8   -1
Name: label, dtype: int64 0   -1
1   -1
2    1
Name: label, dtype: int64


In [38]:
train_dataset = CustomGraphDataset(X_df=X_train,Y_df=y_train)
valid_dataset = CustomGraphDataset(X_df=X_test,Y_df=y_test)

train_loader = DataLoader(
        train_dataset,
        batch_size=2,
        shuffle=True,
        drop_last=True,
    )
valid_loader = DataLoader(
        valid_dataset,
        batch_size=2,
        shuffle=True,
        drop_last=True,
    )


Unnamed: 0,Head,Relation,Tail
8,4,2,2
5,4,1,5
2,2,1,0
1,1,1,2
11,3,1,0


In [None]:
class TranslationalDistanceModel(LightningModule):
    def __init__(self, config: Config):
        super(TranslationalDistanceModel, self).__init__()
        self.config = config
        ## TODO Initialize the embedding layer for nodes and edges
        ### Embedding layer
        self.node_embedding = nn.Embedding(
            self.config.node_count,
            self.config.node_embedding_size,
            #padding_idx=self.config.X_padding_idx,
        )
        self.edge_embedding = nn.Embedding(
            self.config.edge_count,
            self.config.edge_embedding_size,
            #padding_idx=self.config.X_padding_idx,
        )

    def forward(self, head, relation, tail, Y_label = None):
        # head = bs,1
        # relation = bs,1
        # tail = bs,1
        if self.config.model == "TransE":
            ## L2
            difference = head + relation - tail
            ## IP (bs,emb) -> OP (bs,1)
            l2_norm = torch.norm(difference, p='fro', dim = 0) ## Check the dimention
            ## return bs,1
            return l2_norm


